]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
* config/m68k/m68k.c (m68k_output_mi_thunk): Fix typo.
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
07933f72
GS
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
2a2ab3f9
JVA
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
bb5177ac 23#include "system.h"
2a2ab3f9 24#include "rtl.h"
6baf1cc8
BS
25#include "tree.h"
26#include "tm_p.h"
2a2ab3f9
JVA
27#include "regs.h"
28#include "hard-reg-set.h"
29#include "real.h"
30#include "insn-config.h"
31#include "conditions.h"
2a2ab3f9
JVA
32#include "output.h"
33#include "insn-attr.h"
2a2ab3f9 34#include "flags.h"
a8ffcc81 35#include "except.h"
ecbc4695 36#include "function.h"
00c79232 37#include "recog.h"
ced8dd8c 38#include "expr.h"
e78d8e51 39#include "optabs.h"
f103890b 40#include "toplev.h"
e075ae69 41#include "basic-block.h"
1526a060 42#include "ggc.h"
672a6f42
NB
43#include "target.h"
44#include "target-def.h"
f1e639b1 45#include "langhooks.h"
2a2ab3f9 46
8dfe5673 47#ifndef CHECK_STACK_LIMIT
07933f72 48#define CHECK_STACK_LIMIT (-1)
8dfe5673
RK
49#endif
50
2ab0437e 51/* Processor costs (relative to an add) */
fce5a9f2 52static const
2ab0437e
JH
53struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
44cf5b6a
JH
61 3, /* cost of movsx */
62 3, /* cost of movzx */
2ab0437e
JH
63 0, /* "large" insn */
64 2, /* MOVE_RATIO */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
f4365627
JH
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
229b303a
RS
87 2, /* cost of FADD and FSUB insns. */
88 2, /* cost of FMUL instruction. */
89 2, /* cost of FDIV instruction. */
90 2, /* cost of FABS instruction. */
91 2, /* cost of FCHS instruction. */
92 2, /* cost of FSQRT instruction. */
2ab0437e 93};
229b303a 94
32b5b1aa 95/* Processor costs (relative to an add) */
fce5a9f2 96static const
32b5b1aa 97struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 98 1, /* cost of an add instruction */
32b5b1aa
SC
99 1, /* cost of a lea instruction */
100 3, /* variable shift costs */
101 2, /* constant shift costs */
102 6, /* cost of starting a multiply */
103 1, /* cost of multiply per each bit set */
e075ae69 104 23, /* cost of a divide/mod */
44cf5b6a
JH
105 3, /* cost of movsx */
106 2, /* cost of movzx */
96e7ae40 107 15, /* "large" insn */
e2e52e1b 108 3, /* MOVE_RATIO */
7c6b971d 109 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
110 {2, 4, 2}, /* cost of loading integer registers
111 in QImode, HImode and SImode.
0f290768 112 Relative to reg-reg move (2). */
96e7ae40
JH
113 {2, 4, 2}, /* cost of storing integer registers */
114 2, /* cost of reg,reg fld/fst */
115 {8, 8, 8}, /* cost of loading fp registers
116 in SFmode, DFmode and XFmode */
fa79946e
JH
117 {8, 8, 8}, /* cost of loading integer registers */
118 2, /* cost of moving MMX register */
119 {4, 8}, /* cost of loading MMX registers
120 in SImode and DImode */
121 {4, 8}, /* cost of storing MMX registers
122 in SImode and DImode */
123 2, /* cost of moving SSE register */
124 {4, 8, 16}, /* cost of loading SSE registers
125 in SImode, DImode and TImode */
126 {4, 8, 16}, /* cost of storing SSE registers
127 in SImode, DImode and TImode */
128 3, /* MMX or SSE register to integer */
f4365627
JH
129 0, /* size of prefetch block */
130 0, /* number of parallel prefetches */
229b303a
RS
131 23, /* cost of FADD and FSUB insns. */
132 27, /* cost of FMUL instruction. */
133 88, /* cost of FDIV instruction. */
134 22, /* cost of FABS instruction. */
135 24, /* cost of FCHS instruction. */
136 122, /* cost of FSQRT instruction. */
32b5b1aa
SC
137};
138
fce5a9f2 139static const
32b5b1aa
SC
140struct processor_costs i486_cost = { /* 486 specific costs */
141 1, /* cost of an add instruction */
142 1, /* cost of a lea instruction */
143 3, /* variable shift costs */
144 2, /* constant shift costs */
145 12, /* cost of starting a multiply */
146 1, /* cost of multiply per each bit set */
e075ae69 147 40, /* cost of a divide/mod */
44cf5b6a
JH
148 3, /* cost of movsx */
149 2, /* cost of movzx */
96e7ae40 150 15, /* "large" insn */
e2e52e1b 151 3, /* MOVE_RATIO */
7c6b971d 152 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
0f290768 155 Relative to reg-reg move (2). */
96e7ae40
JH
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
fa79946e
JH
160 {8, 8, 8}, /* cost of loading integer registers */
161 2, /* cost of moving MMX register */
162 {4, 8}, /* cost of loading MMX registers
163 in SImode and DImode */
164 {4, 8}, /* cost of storing MMX registers
165 in SImode and DImode */
166 2, /* cost of moving SSE register */
167 {4, 8, 16}, /* cost of loading SSE registers
168 in SImode, DImode and TImode */
169 {4, 8, 16}, /* cost of storing SSE registers
170 in SImode, DImode and TImode */
f4365627
JH
171 3, /* MMX or SSE register to integer */
172 0, /* size of prefetch block */
173 0, /* number of parallel prefetches */
229b303a
RS
174 8, /* cost of FADD and FSUB insns. */
175 16, /* cost of FMUL instruction. */
176 73, /* cost of FDIV instruction. */
177 3, /* cost of FABS instruction. */
178 3, /* cost of FCHS instruction. */
179 83, /* cost of FSQRT instruction. */
32b5b1aa
SC
180};
181
fce5a9f2 182static const
e5cb57e8 183struct processor_costs pentium_cost = {
32b5b1aa
SC
184 1, /* cost of an add instruction */
185 1, /* cost of a lea instruction */
856b07a1 186 4, /* variable shift costs */
e5cb57e8 187 1, /* constant shift costs */
856b07a1
SC
188 11, /* cost of starting a multiply */
189 0, /* cost of multiply per each bit set */
e075ae69 190 25, /* cost of a divide/mod */
44cf5b6a
JH
191 3, /* cost of movsx */
192 2, /* cost of movzx */
96e7ae40 193 8, /* "large" insn */
e2e52e1b 194 6, /* MOVE_RATIO */
7c6b971d 195 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
196 {2, 4, 2}, /* cost of loading integer registers
197 in QImode, HImode and SImode.
0f290768 198 Relative to reg-reg move (2). */
96e7ae40
JH
199 {2, 4, 2}, /* cost of storing integer registers */
200 2, /* cost of reg,reg fld/fst */
201 {2, 2, 6}, /* cost of loading fp registers
202 in SFmode, DFmode and XFmode */
fa79946e
JH
203 {4, 4, 6}, /* cost of loading integer registers */
204 8, /* cost of moving MMX register */
205 {8, 8}, /* cost of loading MMX registers
206 in SImode and DImode */
207 {8, 8}, /* cost of storing MMX registers
208 in SImode and DImode */
209 2, /* cost of moving SSE register */
210 {4, 8, 16}, /* cost of loading SSE registers
211 in SImode, DImode and TImode */
212 {4, 8, 16}, /* cost of storing SSE registers
213 in SImode, DImode and TImode */
f4365627
JH
214 3, /* MMX or SSE register to integer */
215 0, /* size of prefetch block */
216 0, /* number of parallel prefetches */
229b303a
RS
217 3, /* cost of FADD and FSUB insns. */
218 3, /* cost of FMUL instruction. */
219 39, /* cost of FDIV instruction. */
220 1, /* cost of FABS instruction. */
221 1, /* cost of FCHS instruction. */
222 70, /* cost of FSQRT instruction. */
32b5b1aa
SC
223};
224
fce5a9f2 225static const
856b07a1
SC
226struct processor_costs pentiumpro_cost = {
227 1, /* cost of an add instruction */
228 1, /* cost of a lea instruction */
e075ae69 229 1, /* variable shift costs */
856b07a1 230 1, /* constant shift costs */
369e59b1 231 4, /* cost of starting a multiply */
856b07a1 232 0, /* cost of multiply per each bit set */
e075ae69 233 17, /* cost of a divide/mod */
44cf5b6a
JH
234 1, /* cost of movsx */
235 1, /* cost of movzx */
96e7ae40 236 8, /* "large" insn */
e2e52e1b 237 6, /* MOVE_RATIO */
7c6b971d 238 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
239 {4, 4, 4}, /* cost of loading integer registers
240 in QImode, HImode and SImode.
0f290768 241 Relative to reg-reg move (2). */
96e7ae40
JH
242 {2, 2, 2}, /* cost of storing integer registers */
243 2, /* cost of reg,reg fld/fst */
244 {2, 2, 6}, /* cost of loading fp registers
245 in SFmode, DFmode and XFmode */
fa79946e
JH
246 {4, 4, 6}, /* cost of loading integer registers */
247 2, /* cost of moving MMX register */
248 {2, 2}, /* cost of loading MMX registers
249 in SImode and DImode */
250 {2, 2}, /* cost of storing MMX registers
251 in SImode and DImode */
252 2, /* cost of moving SSE register */
253 {2, 2, 8}, /* cost of loading SSE registers
254 in SImode, DImode and TImode */
255 {2, 2, 8}, /* cost of storing SSE registers
256 in SImode, DImode and TImode */
f4365627
JH
257 3, /* MMX or SSE register to integer */
258 32, /* size of prefetch block */
259 6, /* number of parallel prefetches */
229b303a
RS
260 3, /* cost of FADD and FSUB insns. */
261 5, /* cost of FMUL instruction. */
262 56, /* cost of FDIV instruction. */
263 2, /* cost of FABS instruction. */
264 2, /* cost of FCHS instruction. */
265 56, /* cost of FSQRT instruction. */
856b07a1
SC
266};
267
fce5a9f2 268static const
a269a03c
JC
269struct processor_costs k6_cost = {
270 1, /* cost of an add instruction */
e075ae69 271 2, /* cost of a lea instruction */
a269a03c
JC
272 1, /* variable shift costs */
273 1, /* constant shift costs */
73fe76e4 274 3, /* cost of starting a multiply */
a269a03c 275 0, /* cost of multiply per each bit set */
e075ae69 276 18, /* cost of a divide/mod */
44cf5b6a
JH
277 2, /* cost of movsx */
278 2, /* cost of movzx */
96e7ae40 279 8, /* "large" insn */
e2e52e1b 280 4, /* MOVE_RATIO */
7c6b971d 281 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
282 {4, 5, 4}, /* cost of loading integer registers
283 in QImode, HImode and SImode.
0f290768 284 Relative to reg-reg move (2). */
96e7ae40
JH
285 {2, 3, 2}, /* cost of storing integer registers */
286 4, /* cost of reg,reg fld/fst */
287 {6, 6, 6}, /* cost of loading fp registers
288 in SFmode, DFmode and XFmode */
fa79946e
JH
289 {4, 4, 4}, /* cost of loading integer registers */
290 2, /* cost of moving MMX register */
291 {2, 2}, /* cost of loading MMX registers
292 in SImode and DImode */
293 {2, 2}, /* cost of storing MMX registers
294 in SImode and DImode */
295 2, /* cost of moving SSE register */
296 {2, 2, 8}, /* cost of loading SSE registers
297 in SImode, DImode and TImode */
298 {2, 2, 8}, /* cost of storing SSE registers
299 in SImode, DImode and TImode */
f4365627
JH
300 6, /* MMX or SSE register to integer */
301 32, /* size of prefetch block */
302 1, /* number of parallel prefetches */
229b303a
RS
303 2, /* cost of FADD and FSUB insns. */
304 2, /* cost of FMUL instruction. */
4f770e7b
RS
305 56, /* cost of FDIV instruction. */
306 2, /* cost of FABS instruction. */
229b303a
RS
307 2, /* cost of FCHS instruction. */
308 56, /* cost of FSQRT instruction. */
a269a03c
JC
309};
310
fce5a9f2 311static const
309ada50
JH
312struct processor_costs athlon_cost = {
313 1, /* cost of an add instruction */
0b5107cf 314 2, /* cost of a lea instruction */
309ada50
JH
315 1, /* variable shift costs */
316 1, /* constant shift costs */
317 5, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
0b5107cf 319 42, /* cost of a divide/mod */
44cf5b6a
JH
320 1, /* cost of movsx */
321 1, /* cost of movzx */
309ada50 322 8, /* "large" insn */
e2e52e1b 323 9, /* MOVE_RATIO */
309ada50 324 4, /* cost for loading QImode using movzbl */
b72b1c29 325 {3, 4, 3}, /* cost of loading integer registers
309ada50 326 in QImode, HImode and SImode.
0f290768 327 Relative to reg-reg move (2). */
b72b1c29 328 {3, 4, 3}, /* cost of storing integer registers */
309ada50 329 4, /* cost of reg,reg fld/fst */
b72b1c29 330 {4, 4, 12}, /* cost of loading fp registers
309ada50 331 in SFmode, DFmode and XFmode */
b72b1c29 332 {6, 6, 8}, /* cost of loading integer registers */
fa79946e 333 2, /* cost of moving MMX register */
b72b1c29 334 {4, 4}, /* cost of loading MMX registers
fa79946e 335 in SImode and DImode */
b72b1c29 336 {4, 4}, /* cost of storing MMX registers
fa79946e
JH
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
b72b1c29 339 {4, 4, 6}, /* cost of loading SSE registers
fa79946e 340 in SImode, DImode and TImode */
b72b1c29 341 {4, 4, 5}, /* cost of storing SSE registers
fa79946e 342 in SImode, DImode and TImode */
b72b1c29 343 5, /* MMX or SSE register to integer */
f4365627
JH
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
229b303a
RS
346 4, /* cost of FADD and FSUB insns. */
347 4, /* cost of FMUL instruction. */
348 24, /* cost of FDIV instruction. */
349 2, /* cost of FABS instruction. */
350 2, /* cost of FCHS instruction. */
351 35, /* cost of FSQRT instruction. */
309ada50
JH
352};
353
fce5a9f2 354static const
b4e89e2d
JH
355struct processor_costs pentium4_cost = {
356 1, /* cost of an add instruction */
357 1, /* cost of a lea instruction */
358 8, /* variable shift costs */
359 8, /* constant shift costs */
360 30, /* cost of starting a multiply */
361 0, /* cost of multiply per each bit set */
362 112, /* cost of a divide/mod */
44cf5b6a
JH
363 1, /* cost of movsx */
364 1, /* cost of movzx */
b4e89e2d
JH
365 16, /* "large" insn */
366 6, /* MOVE_RATIO */
367 2, /* cost for loading QImode using movzbl */
368 {4, 5, 4}, /* cost of loading integer registers
369 in QImode, HImode and SImode.
370 Relative to reg-reg move (2). */
371 {2, 3, 2}, /* cost of storing integer registers */
372 2, /* cost of reg,reg fld/fst */
373 {2, 2, 6}, /* cost of loading fp registers
374 in SFmode, DFmode and XFmode */
375 {4, 4, 6}, /* cost of loading integer registers */
376 2, /* cost of moving MMX register */
377 {2, 2}, /* cost of loading MMX registers
378 in SImode and DImode */
379 {2, 2}, /* cost of storing MMX registers
380 in SImode and DImode */
381 12, /* cost of moving SSE register */
382 {12, 12, 12}, /* cost of loading SSE registers
383 in SImode, DImode and TImode */
384 {2, 2, 8}, /* cost of storing SSE registers
385 in SImode, DImode and TImode */
386 10, /* MMX or SSE register to integer */
f4365627
JH
387 64, /* size of prefetch block */
388 6, /* number of parallel prefetches */
229b303a
RS
389 5, /* cost of FADD and FSUB insns. */
390 7, /* cost of FMUL instruction. */
391 43, /* cost of FDIV instruction. */
392 2, /* cost of FABS instruction. */
393 2, /* cost of FCHS instruction. */
394 43, /* cost of FSQRT instruction. */
b4e89e2d
JH
395};
396
8b60264b 397const struct processor_costs *ix86_cost = &pentium_cost;
32b5b1aa 398
a269a03c
JC
399/* Processor feature/optimization bitmasks. */
400#define m_386 (1<<PROCESSOR_I386)
401#define m_486 (1<<PROCESSOR_I486)
402#define m_PENT (1<<PROCESSOR_PENTIUM)
403#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
404#define m_K6 (1<<PROCESSOR_K6)
309ada50 405#define m_ATHLON (1<<PROCESSOR_ATHLON)
b4e89e2d 406#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
a269a03c 407
309ada50 408const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
b4e89e2d 409const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
a269a03c 410const int x86_zero_extend_with_and = m_486 | m_PENT;
b4e89e2d 411const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
e075ae69 412const int x86_double_with_add = ~m_386;
a269a03c 413const int x86_use_bit_test = m_386;
e2e52e1b 414const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
b4e89e2d 415const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
47f339cf 416const int x86_3dnow_a = m_ATHLON;
b4e89e2d 417const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
ef6257cd 418const int x86_branch_hints = m_PENT4;
b4e89e2d 419const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
e075ae69
RH
420const int x86_partial_reg_stall = m_PPRO;
421const int x86_use_loop = m_K6;
309ada50 422const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
e075ae69
RH
423const int x86_use_mov0 = m_K6;
424const int x86_use_cltd = ~(m_PENT | m_K6);
425const int x86_read_modify_write = ~m_PENT;
426const int x86_read_modify = ~(m_PENT | m_PPRO);
427const int x86_split_long_moves = m_PPRO;
285464d0
JH
428const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
429const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
b4e89e2d 430const int x86_single_stringop = m_386 | m_PENT4;
d9f32422
JH
431const int x86_qimode_math = ~(0);
432const int x86_promote_qi_regs = 0;
433const int x86_himode_math = ~(m_PPRO);
434const int x86_promote_hi_regs = m_PPRO;
b4e89e2d
JH
435const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
436const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
437const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
438const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
77966be3 439const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO);
b4e89e2d
JH
440const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
441const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
c6036a37
JH
442const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
443const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
444const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
b972dd02 445const int x86_decompose_lea = m_PENT4;
495333a6 446const int x86_shift1 = ~m_486;
285464d0 447const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
a269a03c 448
6ab16dd9
JH
449/* In case the avreage insn count for single function invocation is
450 lower than this constant, emit fast (but longer) prologue and
451 epilogue code. */
452#define FAST_PROLOGUE_INSN_COUNT 30
5bf0ebab 453
6ab16dd9
JH
454/* Set by prologue expander and used by epilogue expander to determine
455 the style used. */
456static int use_fast_prologue_epilogue;
457
5bf0ebab
RH
458/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
459static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
460static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
461static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
4c0d89b5
RS
462
463/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 464 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 465
e075ae69 466enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
467{
468 /* ax, dx, cx, bx */
ab408a86 469 AREG, DREG, CREG, BREG,
4c0d89b5 470 /* si, di, bp, sp */
e075ae69 471 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
472 /* FP registers */
473 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 474 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 475 /* arg pointer */
83774849 476 NON_Q_REGS,
564d80f4 477 /* flags, fpsr, dirflag, frame */
a7180f70
BS
478 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
479 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
480 SSE_REGS, SSE_REGS,
481 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30
JH
482 MMX_REGS, MMX_REGS,
483 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
484 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
485 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
486 SSE_REGS, SSE_REGS,
4c0d89b5 487};
c572e5ba 488
3d117b30 489/* The "default" register map used in 32bit mode. */
83774849 490
0f290768 491int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
492{
493 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
494 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
3d117b30 495 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
496 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
497 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
498 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
499 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
500};
501
5bf0ebab
RH
502static int const x86_64_int_parameter_registers[6] =
503{
504 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
505 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
506};
507
508static int const x86_64_int_return_registers[4] =
509{
510 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
511};
53c17031 512
0f7fa3d0
JH
513/* The "default" register map used in 64bit mode. */
514int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
515{
516 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
3de72741 517 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
0f7fa3d0
JH
518 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
519 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
520 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
521 8,9,10,11,12,13,14,15, /* extended integer registers */
522 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
523};
524
83774849
RH
525/* Define the register numbers to be used in Dwarf debugging information.
526 The SVR4 reference port C compiler uses the following register numbers
527 in its Dwarf output code:
528 0 for %eax (gcc regno = 0)
529 1 for %ecx (gcc regno = 2)
530 2 for %edx (gcc regno = 1)
531 3 for %ebx (gcc regno = 3)
532 4 for %esp (gcc regno = 7)
533 5 for %ebp (gcc regno = 6)
534 6 for %esi (gcc regno = 4)
535 7 for %edi (gcc regno = 5)
536 The following three DWARF register numbers are never generated by
537 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
538 believes these numbers have these meanings.
539 8 for %eip (no gcc equivalent)
540 9 for %eflags (gcc regno = 17)
541 10 for %trapno (no gcc equivalent)
542 It is not at all clear how we should number the FP stack registers
543 for the x86 architecture. If the version of SDB on x86/svr4 were
544 a bit less brain dead with respect to floating-point then we would
545 have a precedent to follow with respect to DWARF register numbers
546 for x86 FP registers, but the SDB on x86/svr4 is so completely
547 broken with respect to FP registers that it is hardly worth thinking
548 of it as something to strive for compatibility with.
549 The version of x86/svr4 SDB I have at the moment does (partially)
550 seem to believe that DWARF register number 11 is associated with
551 the x86 register %st(0), but that's about all. Higher DWARF
552 register numbers don't seem to be associated with anything in
553 particular, and even for DWARF regno 11, SDB only seems to under-
554 stand that it should say that a variable lives in %st(0) (when
555 asked via an `=' command) if we said it was in DWARF regno 11,
556 but SDB still prints garbage when asked for the value of the
557 variable in question (via a `/' command).
558 (Also note that the labels SDB prints for various FP stack regs
559 when doing an `x' command are all wrong.)
560 Note that these problems generally don't affect the native SVR4
561 C compiler because it doesn't allow the use of -O with -g and
562 because when it is *not* optimizing, it allocates a memory
563 location for each floating-point variable, and the memory
564 location is what gets described in the DWARF AT_location
565 attribute for the variable in question.
566 Regardless of the severe mental illness of the x86/svr4 SDB, we
567 do something sensible here and we use the following DWARF
568 register numbers. Note that these are all stack-top-relative
569 numbers.
570 11 for %st(0) (gcc regno = 8)
571 12 for %st(1) (gcc regno = 9)
572 13 for %st(2) (gcc regno = 10)
573 14 for %st(3) (gcc regno = 11)
574 15 for %st(4) (gcc regno = 12)
575 16 for %st(5) (gcc regno = 13)
576 17 for %st(6) (gcc regno = 14)
577 18 for %st(7) (gcc regno = 15)
578*/
0f290768 579int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
580{
581 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
582 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
3f3f2124 583 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
584 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
585 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
3f3f2124
JH
586 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
587 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
83774849
RH
588};
589
c572e5ba
JVA
590/* Test and compare insns in i386.md store the information needed to
591 generate branch and scc insns here. */
592
07933f72
GS
593rtx ix86_compare_op0 = NULL_RTX;
594rtx ix86_compare_op1 = NULL_RTX;
f5316dfe 595
f996902d
RH
596/* The encoding characters for the four TLS models present in ELF. */
597
755ac5d4 598static char const tls_model_chars[] = " GLil";
f996902d 599
7a2e09f4 600#define MAX_386_STACK_LOCALS 3
8362f420
JH
601/* Size of the register save area. */
602#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
36edd3cc
BS
603
604/* Define the structure for the machine field in struct function. */
e2500fed 605struct machine_function GTY(())
36edd3cc
BS
606{
607 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
f996902d 608 const char *some_ld_name;
8362f420 609 int save_varrargs_registers;
6fca22eb 610 int accesses_prev_frame;
36edd3cc
BS
611};
612
01d939e8 613#define ix86_stack_locals (cfun->machine->stack_locals)
8362f420 614#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
36edd3cc 615
4dd2ac2c
JH
616/* Structure describing stack frame layout.
617 Stack grows downward:
618
619 [arguments]
620 <- ARG_POINTER
621 saved pc
622
623 saved frame pointer if frame_pointer_needed
624 <- HARD_FRAME_POINTER
625 [saved regs]
626
627 [padding1] \
628 )
629 [va_arg registers] (
630 > to_allocate <- FRAME_POINTER
631 [frame] (
632 )
633 [padding2] /
634 */
635struct ix86_frame
636{
637 int nregs;
638 int padding1;
8362f420 639 int va_arg_size;
4dd2ac2c
JH
640 HOST_WIDE_INT frame;
641 int padding2;
642 int outgoing_arguments_size;
8362f420 643 int red_zone_size;
4dd2ac2c
JH
644
645 HOST_WIDE_INT to_allocate;
646 /* The offsets relative to ARG_POINTER. */
647 HOST_WIDE_INT frame_pointer_offset;
648 HOST_WIDE_INT hard_frame_pointer_offset;
649 HOST_WIDE_INT stack_pointer_offset;
650};
651
c93e80a5
JH
652/* Used to enable/disable debugging features. */
653const char *ix86_debug_arg_string, *ix86_debug_addr_string;
6189a572
JH
654/* Code model option as passed by user. */
655const char *ix86_cmodel_string;
656/* Parsed value. */
657enum cmodel ix86_cmodel;
80f33d06
GS
658/* Asm dialect. */
659const char *ix86_asm_string;
660enum asm_dialect ix86_asm_dialect = ASM_ATT;
f996902d
RH
661/* TLS dialext. */
662const char *ix86_tls_dialect_string;
663enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
6189a572 664
5bf0ebab 665/* Which unit we are generating floating point math for. */
965f5423
JH
666enum fpmath_unit ix86_fpmath;
667
5bf0ebab
RH
668/* Which cpu are we scheduling for. */
669enum processor_type ix86_cpu;
670/* Which instruction set architecture to use. */
671enum processor_type ix86_arch;
c8c5cb99
SC
672
673/* Strings to hold which cpu and instruction set architecture to use. */
9c23aa47
ZW
674const char *ix86_cpu_string; /* for -mcpu=<xxx> */
675const char *ix86_arch_string; /* for -march=<xxx> */
965f5423 676const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
c8c5cb99 677
0f290768 678/* # of registers to use to pass arguments. */
e075ae69 679const char *ix86_regparm_string;
e9a25f70 680
f4365627
JH
681/* true if sse prefetch instruction is not NOOP. */
682int x86_prefetch_sse;
683
e075ae69
RH
684/* ix86_regparm_string as a number */
685int ix86_regparm;
e9a25f70
JL
686
687/* Alignment to use for loops and jumps: */
688
0f290768 689/* Power of two alignment for loops. */
e075ae69 690const char *ix86_align_loops_string;
e9a25f70 691
0f290768 692/* Power of two alignment for non-loop jumps. */
e075ae69 693const char *ix86_align_jumps_string;
e9a25f70 694
3af4bd89 695/* Power of two alignment for stack boundary in bytes. */
e075ae69 696const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
697
698/* Preferred alignment for stack boundary in bits. */
e075ae69 699int ix86_preferred_stack_boundary;
3af4bd89 700
e9a25f70 701/* Values 1-5: see jump.c */
e075ae69
RH
702int ix86_branch_cost;
703const char *ix86_branch_cost_string;
e9a25f70 704
0f290768 705/* Power of two alignment for functions. */
e075ae69 706const char *ix86_align_funcs_string;
623fe810
RH
707
708/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
709static char internal_label_prefix[16];
710static int internal_label_prefix_len;
e075ae69 711\f
623fe810 712static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
f996902d 713static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
f6da8bc3
KG
714static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
715static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
e075ae69 716 int, int, FILE *));
f996902d
RH
717static const char *get_some_local_dynamic_name PARAMS ((void));
718static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
719static rtx maybe_get_pool_constant PARAMS ((rtx));
f6da8bc3 720static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
3a3677ff
RH
721static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
722 rtx *, rtx *));
f996902d 723static rtx get_thread_pointer PARAMS ((void));
145aacc2 724static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
f6da8bc3
KG
725static rtx gen_push PARAMS ((rtx));
726static int memory_address_length PARAMS ((rtx addr));
727static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
728static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
f6da8bc3
KG
729static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
730static void ix86_dump_ppro_packet PARAMS ((FILE *));
731static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
e2500fed 732static struct machine_function * ix86_init_machine_status PARAMS ((void));
2b589241 733static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
b531087a
KH
734static int ix86_nsaved_regs PARAMS ((void));
735static void ix86_emit_save_regs PARAMS ((void));
c6036a37 736static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
37a58036 737static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
bd09bdeb 738static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
0e4970d7 739static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
b531087a 740static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
55efb413 741static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
0945b39d 742static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
0945b39d
JH
743static rtx ix86_expand_aligntest PARAMS ((rtx, int));
744static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
c237e94a
ZW
745static int ix86_issue_rate PARAMS ((void));
746static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
747static void ix86_sched_init PARAMS ((FILE *, int, int));
748static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
749static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
9b690711
RH
750static int ia32_use_dfa_pipeline_interface PARAMS ((void));
751static int ia32_multipass_dfa_lookahead PARAMS ((void));
e37af218 752static void ix86_init_mmx_sse_builtins PARAMS ((void));
483ab821 753static rtx ia32_this_parameter PARAMS ((tree));
e075ae69
RH
754
755struct ix86_address
756{
757 rtx base, index, disp;
758 HOST_WIDE_INT scale;
759};
b08de47e 760
e075ae69 761static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
bd793c65 762
f996902d
RH
763static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
764static const char *ix86_strip_name_encoding PARAMS ((const char *))
765 ATTRIBUTE_UNUSED;
fb49053f 766
bd793c65 767struct builtin_description;
8b60264b
KG
768static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
769 tree, rtx));
770static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
771 tree, rtx));
bd793c65
BS
772static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
773static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
774static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
e37af218 775static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
bd793c65 776static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
c0c102a9
JH
777static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
778static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
779 enum rtx_code *,
780 enum rtx_code *,
781 enum rtx_code *));
9e7adcb3
JH
782static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
783 rtx *, rtx *));
784static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
785static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
786static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
787static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
bd09bdeb 788static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
9b690711 789static int ix86_save_reg PARAMS ((unsigned int, int));
4dd2ac2c 790static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
8d8e52be 791static int ix86_comp_type_attributes PARAMS ((tree, tree));
483ab821 792static int ix86_fntype_regparm PARAMS ((tree));
91d231cb
JM
793const struct attribute_spec ix86_attribute_table[];
794static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
795static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
b069de3b 796static int ix86_value_regno PARAMS ((enum machine_mode));
7c262518 797
21c318ba 798#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
2cc07db4
RH
799static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
800#endif
e56feed6 801
53c17031
JH
802/* Register class used for passing given 64bit part of the argument.
803 These represent classes as documented by the PS ABI, with the exception
804 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
805 use SF or DFmode move instead of DImode to avoid reformating penalties.
806
807 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
808 whenever possible (upper half does contain padding).
809 */
810enum x86_64_reg_class
811 {
812 X86_64_NO_CLASS,
813 X86_64_INTEGER_CLASS,
814 X86_64_INTEGERSI_CLASS,
815 X86_64_SSE_CLASS,
816 X86_64_SSESF_CLASS,
817 X86_64_SSEDF_CLASS,
818 X86_64_SSEUP_CLASS,
819 X86_64_X87_CLASS,
820 X86_64_X87UP_CLASS,
821 X86_64_MEMORY_CLASS
822 };
0b5826ac 823static const char * const x86_64_reg_class_name[] =
53c17031
JH
824 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
825
826#define MAX_CLASSES 4
827static int classify_argument PARAMS ((enum machine_mode, tree,
828 enum x86_64_reg_class [MAX_CLASSES],
829 int));
830static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
831 int *));
832static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
07933f72 833 const int *, int));
53c17031
JH
834static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
835 enum x86_64_reg_class));
672a6f42
NB
836\f
837/* Initialize the GCC target structure. */
91d231cb
JM
838#undef TARGET_ATTRIBUTE_TABLE
839#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
672a6f42 840#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
f5f4be42
NB
841# undef TARGET_MERGE_DECL_ATTRIBUTES
842# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
672a6f42
NB
843#endif
844
8d8e52be
JM
845#undef TARGET_COMP_TYPE_ATTRIBUTES
846#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
847
f6155fda
SS
848#undef TARGET_INIT_BUILTINS
849#define TARGET_INIT_BUILTINS ix86_init_builtins
850
851#undef TARGET_EXPAND_BUILTIN
852#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
853
bd09bdeb
RH
854#undef TARGET_ASM_FUNCTION_EPILOGUE
855#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
08c148a8 856
17b53c33
NB
857#undef TARGET_ASM_OPEN_PAREN
858#define TARGET_ASM_OPEN_PAREN ""
859#undef TARGET_ASM_CLOSE_PAREN
860#define TARGET_ASM_CLOSE_PAREN ""
861
301d03af
RS
862#undef TARGET_ASM_ALIGNED_HI_OP
863#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
864#undef TARGET_ASM_ALIGNED_SI_OP
865#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
866#ifdef ASM_QUAD
867#undef TARGET_ASM_ALIGNED_DI_OP
868#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
869#endif
870
871#undef TARGET_ASM_UNALIGNED_HI_OP
872#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
873#undef TARGET_ASM_UNALIGNED_SI_OP
874#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
875#undef TARGET_ASM_UNALIGNED_DI_OP
876#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
877
c237e94a
ZW
878#undef TARGET_SCHED_ADJUST_COST
879#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
880#undef TARGET_SCHED_ISSUE_RATE
881#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
882#undef TARGET_SCHED_VARIABLE_ISSUE
883#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
884#undef TARGET_SCHED_INIT
885#define TARGET_SCHED_INIT ix86_sched_init
886#undef TARGET_SCHED_REORDER
887#define TARGET_SCHED_REORDER ix86_sched_reorder
fce5a9f2 888#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
9b690711
RH
889#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
890 ia32_use_dfa_pipeline_interface
891#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
892#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
893 ia32_multipass_dfa_lookahead
c237e94a 894
f996902d
RH
895#ifdef HAVE_AS_TLS
896#undef TARGET_HAVE_TLS
897#define TARGET_HAVE_TLS true
898#endif
899
f6897b10 900struct gcc_target targetm = TARGET_INITIALIZER;
e075ae69 901\f
f5316dfe
MM
902/* Sometimes certain combinations of command options do not make
903 sense on a particular target machine. You can define a macro
904 `OVERRIDE_OPTIONS' to take account of this. This macro, if
905 defined, is executed once just after all the command options have
906 been parsed.
907
908 Don't use this macro to turn on various extra optimizations for
909 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
910
911void
912override_options ()
913{
400500c4 914 int i;
e075ae69
RH
915 /* Comes from final.c -- no real reason to change it. */
916#define MAX_CODE_ALIGN 16
f5316dfe 917
c8c5cb99
SC
918 static struct ptt
919 {
8b60264b
KG
920 const struct processor_costs *cost; /* Processor costs */
921 const int target_enable; /* Target flags to enable. */
922 const int target_disable; /* Target flags to disable. */
923 const int align_loop; /* Default alignments. */
2cca7283 924 const int align_loop_max_skip;
8b60264b 925 const int align_jump;
2cca7283 926 const int align_jump_max_skip;
8b60264b
KG
927 const int align_func;
928 const int branch_cost;
e075ae69 929 }
0f290768 930 const processor_target_table[PROCESSOR_max] =
e075ae69 931 {
2cca7283
JH
932 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
933 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
934 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
935 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
936 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
937 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
938 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
e075ae69
RH
939 };
940
f4365627 941 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
e075ae69
RH
942 static struct pta
943 {
8b60264b
KG
944 const char *const name; /* processor name or nickname. */
945 const enum processor_type processor;
0dd0e980
JH
946 const enum pta_flags
947 {
948 PTA_SSE = 1,
949 PTA_SSE2 = 2,
950 PTA_MMX = 4,
f4365627 951 PTA_PREFETCH_SSE = 8,
0dd0e980
JH
952 PTA_3DNOW = 16,
953 PTA_3DNOW_A = 64
954 } flags;
e075ae69 955 }
0f290768 956 const processor_alias_table[] =
e075ae69 957 {
0dd0e980
JH
958 {"i386", PROCESSOR_I386, 0},
959 {"i486", PROCESSOR_I486, 0},
960 {"i586", PROCESSOR_PENTIUM, 0},
961 {"pentium", PROCESSOR_PENTIUM, 0},
962 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
3780101d
JG
963 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
964 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
965 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
0dd0e980
JH
966 {"i686", PROCESSOR_PENTIUMPRO, 0},
967 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
968 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
f4365627 969 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
0dd0e980 970 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
f4365627 971 PTA_MMX | PTA_PREFETCH_SSE},
0dd0e980
JH
972 {"k6", PROCESSOR_K6, PTA_MMX},
973 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
974 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
f4365627 975 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 976 | PTA_3DNOW_A},
f4365627 977 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
0dd0e980 978 | PTA_3DNOW | PTA_3DNOW_A},
f4365627 979 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 980 | PTA_3DNOW_A | PTA_SSE},
f4365627 981 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 982 | PTA_3DNOW_A | PTA_SSE},
f4365627 983 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 984 | PTA_3DNOW_A | PTA_SSE},
3af4bd89 985 };
c8c5cb99 986
ca7558fc 987 int const pta_size = ARRAY_SIZE (processor_alias_table);
c8c5cb99 988
3dc85dfb
RH
989 /* By default our XFmode is the 80-bit extended format. If we have
990 use TFmode instead, it's also the 80-bit format, but with padding. */
991 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
992 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
993
f5316dfe
MM
994#ifdef SUBTARGET_OVERRIDE_OPTIONS
995 SUBTARGET_OVERRIDE_OPTIONS;
996#endif
997
f4365627
JH
998 if (!ix86_cpu_string && ix86_arch_string)
999 ix86_cpu_string = ix86_arch_string;
1000 if (!ix86_cpu_string)
1001 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
1002 if (!ix86_arch_string)
1003 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
e075ae69 1004
6189a572
JH
1005 if (ix86_cmodel_string != 0)
1006 {
1007 if (!strcmp (ix86_cmodel_string, "small"))
1008 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1009 else if (flag_pic)
c725bd79 1010 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
6189a572
JH
1011 else if (!strcmp (ix86_cmodel_string, "32"))
1012 ix86_cmodel = CM_32;
1013 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1014 ix86_cmodel = CM_KERNEL;
1015 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1016 ix86_cmodel = CM_MEDIUM;
1017 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1018 ix86_cmodel = CM_LARGE;
1019 else
1020 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1021 }
1022 else
1023 {
1024 ix86_cmodel = CM_32;
1025 if (TARGET_64BIT)
1026 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1027 }
c93e80a5
JH
1028 if (ix86_asm_string != 0)
1029 {
1030 if (!strcmp (ix86_asm_string, "intel"))
1031 ix86_asm_dialect = ASM_INTEL;
1032 else if (!strcmp (ix86_asm_string, "att"))
1033 ix86_asm_dialect = ASM_ATT;
1034 else
1035 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1036 }
6189a572 1037 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
c725bd79 1038 error ("code model `%s' not supported in the %s bit mode",
6189a572
JH
1039 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1040 if (ix86_cmodel == CM_LARGE)
c725bd79 1041 sorry ("code model `large' not supported yet");
0c2dc519 1042 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
c725bd79 1043 sorry ("%i-bit mode not compiled in",
0c2dc519 1044 (target_flags & MASK_64BIT) ? 64 : 32);
6189a572 1045
f4365627
JH
1046 for (i = 0; i < pta_size; i++)
1047 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1048 {
1049 ix86_arch = processor_alias_table[i].processor;
1050 /* Default cpu tuning to the architecture. */
1051 ix86_cpu = ix86_arch;
1052 if (processor_alias_table[i].flags & PTA_MMX
9ef1b13a 1053 && !(target_flags_explicit & MASK_MMX))
f4365627
JH
1054 target_flags |= MASK_MMX;
1055 if (processor_alias_table[i].flags & PTA_3DNOW
9ef1b13a 1056 && !(target_flags_explicit & MASK_3DNOW))
f4365627
JH
1057 target_flags |= MASK_3DNOW;
1058 if (processor_alias_table[i].flags & PTA_3DNOW_A
9ef1b13a 1059 && !(target_flags_explicit & MASK_3DNOW_A))
f4365627
JH
1060 target_flags |= MASK_3DNOW_A;
1061 if (processor_alias_table[i].flags & PTA_SSE
9ef1b13a 1062 && !(target_flags_explicit & MASK_SSE))
f4365627
JH
1063 target_flags |= MASK_SSE;
1064 if (processor_alias_table[i].flags & PTA_SSE2
9ef1b13a 1065 && !(target_flags_explicit & MASK_SSE2))
f4365627
JH
1066 target_flags |= MASK_SSE2;
1067 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1068 x86_prefetch_sse = true;
1069 break;
1070 }
400500c4 1071
f4365627
JH
1072 if (i == pta_size)
1073 error ("bad value (%s) for -march= switch", ix86_arch_string);
e075ae69 1074
f4365627
JH
1075 for (i = 0; i < pta_size; i++)
1076 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1077 {
1078 ix86_cpu = processor_alias_table[i].processor;
1079 break;
1080 }
1081 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1082 x86_prefetch_sse = true;
1083 if (i == pta_size)
1084 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
e075ae69 1085
2ab0437e
JH
1086 if (optimize_size)
1087 ix86_cost = &size_cost;
1088 else
1089 ix86_cost = processor_target_table[ix86_cpu].cost;
e075ae69
RH
1090 target_flags |= processor_target_table[ix86_cpu].target_enable;
1091 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1092
36edd3cc
BS
1093 /* Arrange to set up i386_stack_locals for all functions. */
1094 init_machine_status = ix86_init_machine_status;
fce5a9f2 1095
0f290768 1096 /* Validate -mregparm= value. */
e075ae69 1097 if (ix86_regparm_string)
b08de47e 1098 {
400500c4
RK
1099 i = atoi (ix86_regparm_string);
1100 if (i < 0 || i > REGPARM_MAX)
1101 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1102 else
1103 ix86_regparm = i;
b08de47e 1104 }
0d7d98ee
JH
1105 else
1106 if (TARGET_64BIT)
1107 ix86_regparm = REGPARM_MAX;
b08de47e 1108
3e18fdf6 1109 /* If the user has provided any of the -malign-* options,
a4f31c00 1110 warn and use that value only if -falign-* is not set.
3e18fdf6 1111 Remove this code in GCC 3.2 or later. */
e075ae69 1112 if (ix86_align_loops_string)
b08de47e 1113 {
3e18fdf6
GK
1114 warning ("-malign-loops is obsolete, use -falign-loops");
1115 if (align_loops == 0)
1116 {
1117 i = atoi (ix86_align_loops_string);
1118 if (i < 0 || i > MAX_CODE_ALIGN)
1119 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1120 else
1121 align_loops = 1 << i;
1122 }
b08de47e 1123 }
3af4bd89 1124
e075ae69 1125 if (ix86_align_jumps_string)
b08de47e 1126 {
3e18fdf6
GK
1127 warning ("-malign-jumps is obsolete, use -falign-jumps");
1128 if (align_jumps == 0)
1129 {
1130 i = atoi (ix86_align_jumps_string);
1131 if (i < 0 || i > MAX_CODE_ALIGN)
1132 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1133 else
1134 align_jumps = 1 << i;
1135 }
b08de47e 1136 }
b08de47e 1137
e075ae69 1138 if (ix86_align_funcs_string)
b08de47e 1139 {
3e18fdf6
GK
1140 warning ("-malign-functions is obsolete, use -falign-functions");
1141 if (align_functions == 0)
1142 {
1143 i = atoi (ix86_align_funcs_string);
1144 if (i < 0 || i > MAX_CODE_ALIGN)
1145 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1146 else
1147 align_functions = 1 << i;
1148 }
b08de47e 1149 }
3af4bd89 1150
3e18fdf6 1151 /* Default align_* from the processor table. */
3e18fdf6 1152 if (align_loops == 0)
2cca7283
JH
1153 {
1154 align_loops = processor_target_table[ix86_cpu].align_loop;
1155 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1156 }
3e18fdf6 1157 if (align_jumps == 0)
2cca7283
JH
1158 {
1159 align_jumps = processor_target_table[ix86_cpu].align_jump;
1160 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1161 }
3e18fdf6 1162 if (align_functions == 0)
2cca7283
JH
1163 {
1164 align_functions = processor_target_table[ix86_cpu].align_func;
1165 }
3e18fdf6 1166
e4c0478d 1167 /* Validate -mpreferred-stack-boundary= value, or provide default.
fbb83b43
AO
1168 The default of 128 bits is for Pentium III's SSE __m128, but we
1169 don't want additional code to keep the stack aligned when
1170 optimizing for code size. */
1171 ix86_preferred_stack_boundary = (optimize_size
ef49d42e 1172 ? TARGET_64BIT ? 128 : 32
fbb83b43 1173 : 128);
e075ae69 1174 if (ix86_preferred_stack_boundary_string)
3af4bd89 1175 {
400500c4 1176 i = atoi (ix86_preferred_stack_boundary_string);
ef49d42e 1177 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
c6257c5d 1178 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
ef49d42e 1179 TARGET_64BIT ? 4 : 2);
400500c4
RK
1180 else
1181 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 1182 }
77a989d1 1183
0f290768 1184 /* Validate -mbranch-cost= value, or provide default. */
e075ae69
RH
1185 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1186 if (ix86_branch_cost_string)
804a8ee0 1187 {
400500c4
RK
1188 i = atoi (ix86_branch_cost_string);
1189 if (i < 0 || i > 5)
1190 error ("-mbranch-cost=%d is not between 0 and 5", i);
1191 else
1192 ix86_branch_cost = i;
804a8ee0 1193 }
804a8ee0 1194
f996902d
RH
1195 if (ix86_tls_dialect_string)
1196 {
1197 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1198 ix86_tls_dialect = TLS_DIALECT_GNU;
1199 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1200 ix86_tls_dialect = TLS_DIALECT_SUN;
1201 else
1202 error ("bad value (%s) for -mtls-dialect= switch",
1203 ix86_tls_dialect_string);
1204 }
1205
db01f480
JH
1206 if (profile_flag)
1207 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1208
e9a25f70
JL
1209 /* Keep nonleaf frame pointers. */
1210 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 1211 flag_omit_frame_pointer = 1;
e075ae69
RH
1212
1213 /* If we're doing fast math, we don't care about comparison order
1214 wrt NaNs. This lets us use a shorter comparison sequence. */
de6c5979 1215 if (flag_unsafe_math_optimizations)
e075ae69
RH
1216 target_flags &= ~MASK_IEEE_FP;
1217
30c99a84
RH
1218 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1219 since the insns won't need emulation. */
1220 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1221 target_flags &= ~MASK_NO_FANCY_MATH_387;
1222
14f73b5a
JH
1223 if (TARGET_64BIT)
1224 {
1225 if (TARGET_ALIGN_DOUBLE)
c725bd79 1226 error ("-malign-double makes no sense in the 64bit mode");
14f73b5a 1227 if (TARGET_RTD)
c725bd79 1228 error ("-mrtd calling convention not supported in the 64bit mode");
14f73b5a 1229 /* Enable by default the SSE and MMX builtins. */
965f5423
JH
1230 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1231 ix86_fpmath = FPMATH_SSE;
14f73b5a 1232 }
965f5423
JH
1233 else
1234 ix86_fpmath = FPMATH_387;
1235
1236 if (ix86_fpmath_string != 0)
1237 {
1238 if (! strcmp (ix86_fpmath_string, "387"))
1239 ix86_fpmath = FPMATH_387;
1240 else if (! strcmp (ix86_fpmath_string, "sse"))
1241 {
1242 if (!TARGET_SSE)
1243 {
1244 warning ("SSE instruction set disabled, using 387 arithmetics");
1245 ix86_fpmath = FPMATH_387;
1246 }
1247 else
1248 ix86_fpmath = FPMATH_SSE;
1249 }
1250 else if (! strcmp (ix86_fpmath_string, "387,sse")
1251 || ! strcmp (ix86_fpmath_string, "sse,387"))
1252 {
1253 if (!TARGET_SSE)
1254 {
1255 warning ("SSE instruction set disabled, using 387 arithmetics");
1256 ix86_fpmath = FPMATH_387;
1257 }
1258 else if (!TARGET_80387)
1259 {
1260 warning ("387 instruction set disabled, using SSE arithmetics");
1261 ix86_fpmath = FPMATH_SSE;
1262 }
1263 else
1264 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1265 }
fce5a9f2 1266 else
965f5423
JH
1267 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1268 }
14f73b5a 1269
a7180f70
BS
1270 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1271 on by -msse. */
1272 if (TARGET_SSE)
e37af218
RH
1273 {
1274 target_flags |= MASK_MMX;
1275 x86_prefetch_sse = true;
1276 }
c6036a37 1277
47f339cf
BS
1278 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1279 if (TARGET_3DNOW)
1280 {
1281 target_flags |= MASK_MMX;
1282 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1283 extensions it adds. */
1284 if (x86_3dnow_a & (1 << ix86_arch))
1285 target_flags |= MASK_3DNOW_A;
1286 }
c6036a37 1287 if ((x86_accumulate_outgoing_args & CPUMASK)
9ef1b13a 1288 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
c6036a37
JH
1289 && !optimize_size)
1290 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
623fe810
RH
1291
1292 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1293 {
1294 char *p;
1295 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1296 p = strchr (internal_label_prefix, 'X');
1297 internal_label_prefix_len = p - internal_label_prefix;
1298 *p = '\0';
1299 }
483ab821
MM
1300
1301 /* In 64-bit mode, we do not have support for vcall thunks. */
1302 if (TARGET_64BIT)
1303 targetm.asm_out.output_mi_vcall_thunk = NULL;
f5316dfe
MM
1304}
1305\f
32b5b1aa 1306void
c6aded7c 1307optimization_options (level, size)
32b5b1aa 1308 int level;
bb5177ac 1309 int size ATTRIBUTE_UNUSED;
32b5b1aa 1310{
e9a25f70
JL
1311 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1312 make the problem with not enough registers even worse. */
32b5b1aa
SC
1313#ifdef INSN_SCHEDULING
1314 if (level > 1)
1315 flag_schedule_insns = 0;
1316#endif
53c17031
JH
1317 if (TARGET_64BIT && optimize >= 1)
1318 flag_omit_frame_pointer = 1;
1319 if (TARGET_64BIT)
b932f770
JH
1320 {
1321 flag_pcc_struct_return = 0;
1322 flag_asynchronous_unwind_tables = 1;
1323 }
db01f480
JH
1324 if (profile_flag)
1325 flag_omit_frame_pointer = 0;
32b5b1aa 1326}
b08de47e 1327\f
91d231cb
JM
1328/* Table of valid machine attributes. */
1329const struct attribute_spec ix86_attribute_table[] =
b08de47e 1330{
91d231cb 1331 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
b08de47e
MM
1332 /* Stdcall attribute says callee is responsible for popping arguments
1333 if they are not variable. */
91d231cb
JM
1334 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1335 /* Cdecl attribute says the callee is a normal C declaration */
1336 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
b08de47e 1337 /* Regparm attribute specifies how many integer arguments are to be
0f290768 1338 passed in registers. */
91d231cb
JM
1339 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1340#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
3da1eb0b
DS
1341 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1342 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1343 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
91d231cb
JM
1344#endif
1345 { NULL, 0, 0, false, false, false, NULL }
1346};
1347
1348/* Handle a "cdecl" or "stdcall" attribute;
1349 arguments as in struct attribute_spec.handler. */
1350static tree
1351ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1352 tree *node;
1353 tree name;
1354 tree args ATTRIBUTE_UNUSED;
1355 int flags ATTRIBUTE_UNUSED;
1356 bool *no_add_attrs;
1357{
1358 if (TREE_CODE (*node) != FUNCTION_TYPE
1359 && TREE_CODE (*node) != METHOD_TYPE
1360 && TREE_CODE (*node) != FIELD_DECL
1361 && TREE_CODE (*node) != TYPE_DECL)
b08de47e 1362 {
91d231cb
JM
1363 warning ("`%s' attribute only applies to functions",
1364 IDENTIFIER_POINTER (name));
1365 *no_add_attrs = true;
1366 }
b08de47e 1367
91d231cb
JM
1368 if (TARGET_64BIT)
1369 {
1370 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1371 *no_add_attrs = true;
1372 }
b08de47e 1373
91d231cb
JM
1374 return NULL_TREE;
1375}
b08de47e 1376
91d231cb
JM
1377/* Handle a "regparm" attribute;
1378 arguments as in struct attribute_spec.handler. */
1379static tree
1380ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1381 tree *node;
1382 tree name;
1383 tree args;
1384 int flags ATTRIBUTE_UNUSED;
1385 bool *no_add_attrs;
1386{
1387 if (TREE_CODE (*node) != FUNCTION_TYPE
1388 && TREE_CODE (*node) != METHOD_TYPE
1389 && TREE_CODE (*node) != FIELD_DECL
1390 && TREE_CODE (*node) != TYPE_DECL)
1391 {
1392 warning ("`%s' attribute only applies to functions",
1393 IDENTIFIER_POINTER (name));
1394 *no_add_attrs = true;
1395 }
1396 else
1397 {
1398 tree cst;
b08de47e 1399
91d231cb
JM
1400 cst = TREE_VALUE (args);
1401 if (TREE_CODE (cst) != INTEGER_CST)
1402 {
1403 warning ("`%s' attribute requires an integer constant argument",
1404 IDENTIFIER_POINTER (name));
1405 *no_add_attrs = true;
1406 }
1407 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1408 {
1409 warning ("argument to `%s' attribute larger than %d",
1410 IDENTIFIER_POINTER (name), REGPARM_MAX);
1411 *no_add_attrs = true;
1412 }
b08de47e
MM
1413 }
1414
91d231cb 1415 return NULL_TREE;
b08de47e
MM
1416}
1417
1418/* Return 0 if the attributes for two types are incompatible, 1 if they
1419 are compatible, and 2 if they are nearly compatible (which causes a
1420 warning to be generated). */
1421
8d8e52be 1422static int
e075ae69 1423ix86_comp_type_attributes (type1, type2)
afcfe58c
MM
1424 tree type1;
1425 tree type2;
b08de47e 1426{
0f290768 1427 /* Check for mismatch of non-default calling convention. */
27c38fbe 1428 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
1429
1430 if (TREE_CODE (type1) != FUNCTION_TYPE)
1431 return 1;
1432
1433 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
1434 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1435 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 1436 return 0;
b08de47e
MM
1437 return 1;
1438}
b08de47e 1439\f
483ab821
MM
1440/* Return the regparm value for a fuctio with the indicated TYPE. */
1441
1442static int
1443ix86_fntype_regparm (type)
1444 tree type;
1445{
1446 tree attr;
1447
1448 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1449 if (attr)
1450 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1451 else
1452 return ix86_regparm;
1453}
1454
b08de47e
MM
1455/* Value is the number of bytes of arguments automatically
1456 popped when returning from a subroutine call.
1457 FUNDECL is the declaration node of the function (as a tree),
1458 FUNTYPE is the data type of the function (as a tree),
1459 or for a library call it is an identifier node for the subroutine name.
1460 SIZE is the number of bytes of arguments passed on the stack.
1461
1462 On the 80386, the RTD insn may be used to pop them if the number
1463 of args is fixed, but if the number is variable then the caller
1464 must pop them all. RTD can't be used for library calls now
1465 because the library is compiled with the Unix compiler.
1466 Use of RTD is a selectable option, since it is incompatible with
1467 standard Unix calling sequences. If the option is not selected,
1468 the caller must always pop the args.
1469
1470 The attribute stdcall is equivalent to RTD on a per module basis. */
1471
1472int
e075ae69 1473ix86_return_pops_args (fundecl, funtype, size)
b08de47e
MM
1474 tree fundecl;
1475 tree funtype;
1476 int size;
79325812 1477{
3345ee7d 1478 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 1479
0f290768 1480 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 1481 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 1482
0f290768 1483 /* Stdcall functions will pop the stack if not variable args. */
698cdd84
SC
1484 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1485 rtd = 1;
79325812 1486
698cdd84
SC
1487 if (rtd
1488 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
1489 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1490 == void_type_node)))
698cdd84
SC
1491 return size;
1492 }
79325812 1493
232b8f52 1494 /* Lose any fake structure return argument if it is passed on the stack. */
0d7d98ee
JH
1495 if (aggregate_value_p (TREE_TYPE (funtype))
1496 && !TARGET_64BIT)
232b8f52 1497 {
483ab821 1498 int nregs = ix86_fntype_regparm (funtype);
232b8f52
JJ
1499
1500 if (!nregs)
1501 return GET_MODE_SIZE (Pmode);
1502 }
1503
1504 return 0;
b08de47e 1505}
b08de47e
MM
1506\f
1507/* Argument support functions. */
1508
53c17031
JH
1509/* Return true when register may be used to pass function parameters. */
1510bool
1511ix86_function_arg_regno_p (regno)
1512 int regno;
1513{
1514 int i;
1515 if (!TARGET_64BIT)
0333394e
JJ
1516 return (regno < REGPARM_MAX
1517 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
53c17031
JH
1518 if (SSE_REGNO_P (regno) && TARGET_SSE)
1519 return true;
1520 /* RAX is used as hidden argument to va_arg functions. */
1521 if (!regno)
1522 return true;
1523 for (i = 0; i < REGPARM_MAX; i++)
1524 if (regno == x86_64_int_parameter_registers[i])
1525 return true;
1526 return false;
1527}
1528
b08de47e
MM
1529/* Initialize a variable CUM of type CUMULATIVE_ARGS
1530 for a call to a function whose data type is FNTYPE.
1531 For a library call, FNTYPE is 0. */
1532
1533void
1534init_cumulative_args (cum, fntype, libname)
e9a25f70 1535 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
b08de47e
MM
1536 tree fntype; /* tree ptr for function decl */
1537 rtx libname; /* SYMBOL_REF of library name or 0 */
1538{
1539 static CUMULATIVE_ARGS zero_cum;
1540 tree param, next_param;
1541
1542 if (TARGET_DEBUG_ARG)
1543 {
1544 fprintf (stderr, "\ninit_cumulative_args (");
1545 if (fntype)
e9a25f70
JL
1546 fprintf (stderr, "fntype code = %s, ret code = %s",
1547 tree_code_name[(int) TREE_CODE (fntype)],
1548 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
1549 else
1550 fprintf (stderr, "no fntype");
1551
1552 if (libname)
1553 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1554 }
1555
1556 *cum = zero_cum;
1557
1558 /* Set up the number of registers to use for passing arguments. */
e075ae69 1559 cum->nregs = ix86_regparm;
53c17031
JH
1560 cum->sse_nregs = SSE_REGPARM_MAX;
1561 if (fntype && !TARGET_64BIT)
b08de47e
MM
1562 {
1563 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
e9a25f70 1564
b08de47e
MM
1565 if (attr)
1566 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1567 }
53c17031 1568 cum->maybe_vaarg = false;
b08de47e
MM
1569
1570 /* Determine if this function has variable arguments. This is
1571 indicated by the last argument being 'void_type_mode' if there
1572 are no variable arguments. If there are variable arguments, then
1573 we won't pass anything in registers */
1574
1575 if (cum->nregs)
1576 {
1577 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 1578 param != 0; param = next_param)
b08de47e
MM
1579 {
1580 next_param = TREE_CHAIN (param);
e9a25f70 1581 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
53c17031
JH
1582 {
1583 if (!TARGET_64BIT)
1584 cum->nregs = 0;
1585 cum->maybe_vaarg = true;
1586 }
b08de47e
MM
1587 }
1588 }
53c17031
JH
1589 if ((!fntype && !libname)
1590 || (fntype && !TYPE_ARG_TYPES (fntype)))
1591 cum->maybe_vaarg = 1;
b08de47e
MM
1592
1593 if (TARGET_DEBUG_ARG)
1594 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1595
1596 return;
1597}
1598
53c17031 1599/* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
f710504c 1600 of this code is to classify each 8bytes of incoming argument by the register
53c17031
JH
1601 class and assign registers accordingly. */
1602
1603/* Return the union class of CLASS1 and CLASS2.
1604 See the x86-64 PS ABI for details. */
1605
1606static enum x86_64_reg_class
1607merge_classes (class1, class2)
1608 enum x86_64_reg_class class1, class2;
1609{
1610 /* Rule #1: If both classes are equal, this is the resulting class. */
1611 if (class1 == class2)
1612 return class1;
1613
1614 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1615 the other class. */
1616 if (class1 == X86_64_NO_CLASS)
1617 return class2;
1618 if (class2 == X86_64_NO_CLASS)
1619 return class1;
1620
1621 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1622 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1623 return X86_64_MEMORY_CLASS;
1624
1625 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1626 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1627 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1628 return X86_64_INTEGERSI_CLASS;
1629 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1630 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1631 return X86_64_INTEGER_CLASS;
1632
1633 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1634 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1635 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1636 return X86_64_MEMORY_CLASS;
1637
1638 /* Rule #6: Otherwise class SSE is used. */
1639 return X86_64_SSE_CLASS;
1640}
1641
1642/* Classify the argument of type TYPE and mode MODE.
1643 CLASSES will be filled by the register class used to pass each word
1644 of the operand. The number of words is returned. In case the parameter
1645 should be passed in memory, 0 is returned. As a special case for zero
1646 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1647
1648 BIT_OFFSET is used internally for handling records and specifies offset
1649 of the offset in bits modulo 256 to avoid overflow cases.
1650
1651 See the x86-64 PS ABI for details.
1652*/
1653
1654static int
1655classify_argument (mode, type, classes, bit_offset)
1656 enum machine_mode mode;
1657 tree type;
1658 enum x86_64_reg_class classes[MAX_CLASSES];
1659 int bit_offset;
1660{
1661 int bytes =
1662 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
23327dae 1663 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
53c17031 1664
c60ee6f5
JH
1665 /* Variable sized entities are always passed/returned in memory. */
1666 if (bytes < 0)
1667 return 0;
1668
53c17031
JH
1669 if (type && AGGREGATE_TYPE_P (type))
1670 {
1671 int i;
1672 tree field;
1673 enum x86_64_reg_class subclasses[MAX_CLASSES];
1674
1675 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1676 if (bytes > 16)
1677 return 0;
1678
1679 for (i = 0; i < words; i++)
1680 classes[i] = X86_64_NO_CLASS;
1681
1682 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1683 signalize memory class, so handle it as special case. */
1684 if (!words)
1685 {
1686 classes[0] = X86_64_NO_CLASS;
1687 return 1;
1688 }
1689
1690 /* Classify each field of record and merge classes. */
1691 if (TREE_CODE (type) == RECORD_TYPE)
1692 {
91ea38f9
JH
1693 /* For classes first merge in the field of the subclasses. */
1694 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1695 {
1696 tree bases = TYPE_BINFO_BASETYPES (type);
1697 int n_bases = TREE_VEC_LENGTH (bases);
1698 int i;
1699
1700 for (i = 0; i < n_bases; ++i)
1701 {
1702 tree binfo = TREE_VEC_ELT (bases, i);
1703 int num;
1704 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1705 tree type = BINFO_TYPE (binfo);
1706
1707 num = classify_argument (TYPE_MODE (type),
1708 type, subclasses,
1709 (offset + bit_offset) % 256);
1710 if (!num)
1711 return 0;
1712 for (i = 0; i < num; i++)
1713 {
db01f480 1714 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
1715 classes[i + pos] =
1716 merge_classes (subclasses[i], classes[i + pos]);
1717 }
1718 }
1719 }
1720 /* And now merge the fields of structure. */
53c17031
JH
1721 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1722 {
1723 if (TREE_CODE (field) == FIELD_DECL)
1724 {
1725 int num;
1726
1727 /* Bitfields are always classified as integer. Handle them
1728 early, since later code would consider them to be
1729 misaligned integers. */
1730 if (DECL_BIT_FIELD (field))
1731 {
1732 for (i = int_bit_position (field) / 8 / 8;
1733 i < (int_bit_position (field)
1734 + tree_low_cst (DECL_SIZE (field), 0)
1735 + 63) / 8 / 8; i++)
1736 classes[i] =
1737 merge_classes (X86_64_INTEGER_CLASS,
1738 classes[i]);
1739 }
1740 else
1741 {
1742 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1743 TREE_TYPE (field), subclasses,
1744 (int_bit_position (field)
1745 + bit_offset) % 256);
1746 if (!num)
1747 return 0;
1748 for (i = 0; i < num; i++)
1749 {
1750 int pos =
db01f480 1751 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
53c17031
JH
1752 classes[i + pos] =
1753 merge_classes (subclasses[i], classes[i + pos]);
1754 }
1755 }
1756 }
1757 }
1758 }
1759 /* Arrays are handled as small records. */
1760 else if (TREE_CODE (type) == ARRAY_TYPE)
1761 {
1762 int num;
1763 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1764 TREE_TYPE (type), subclasses, bit_offset);
1765 if (!num)
1766 return 0;
1767
1768 /* The partial classes are now full classes. */
1769 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1770 subclasses[0] = X86_64_SSE_CLASS;
1771 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1772 subclasses[0] = X86_64_INTEGER_CLASS;
1773
1774 for (i = 0; i < words; i++)
1775 classes[i] = subclasses[i % num];
1776 }
1777 /* Unions are similar to RECORD_TYPE but offset is always 0. */
e4dbaed5
AS
1778 else if (TREE_CODE (type) == UNION_TYPE
1779 || TREE_CODE (type) == QUAL_UNION_TYPE)
53c17031 1780 {
91ea38f9
JH
1781 /* For classes first merge in the field of the subclasses. */
1782 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1783 {
1784 tree bases = TYPE_BINFO_BASETYPES (type);
1785 int n_bases = TREE_VEC_LENGTH (bases);
1786 int i;
1787
1788 for (i = 0; i < n_bases; ++i)
1789 {
1790 tree binfo = TREE_VEC_ELT (bases, i);
1791 int num;
1792 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1793 tree type = BINFO_TYPE (binfo);
1794
1795 num = classify_argument (TYPE_MODE (type),
1796 type, subclasses,
db01f480 1797 (offset + (bit_offset % 64)) % 256);
91ea38f9
JH
1798 if (!num)
1799 return 0;
1800 for (i = 0; i < num; i++)
1801 {
c16576e6 1802 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
1803 classes[i + pos] =
1804 merge_classes (subclasses[i], classes[i + pos]);
1805 }
1806 }
1807 }
53c17031
JH
1808 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1809 {
1810 if (TREE_CODE (field) == FIELD_DECL)
1811 {
1812 int num;
1813 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1814 TREE_TYPE (field), subclasses,
1815 bit_offset);
1816 if (!num)
1817 return 0;
1818 for (i = 0; i < num; i++)
1819 classes[i] = merge_classes (subclasses[i], classes[i]);
1820 }
1821 }
1822 }
1823 else
1824 abort ();
1825
1826 /* Final merger cleanup. */
1827 for (i = 0; i < words; i++)
1828 {
1829 /* If one class is MEMORY, everything should be passed in
1830 memory. */
1831 if (classes[i] == X86_64_MEMORY_CLASS)
1832 return 0;
1833
d6a7951f 1834 /* The X86_64_SSEUP_CLASS should be always preceded by
53c17031
JH
1835 X86_64_SSE_CLASS. */
1836 if (classes[i] == X86_64_SSEUP_CLASS
1837 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1838 classes[i] = X86_64_SSE_CLASS;
1839
d6a7951f 1840 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
53c17031
JH
1841 if (classes[i] == X86_64_X87UP_CLASS
1842 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1843 classes[i] = X86_64_SSE_CLASS;
1844 }
1845 return words;
1846 }
1847
1848 /* Compute alignment needed. We align all types to natural boundaries with
1849 exception of XFmode that is aligned to 64bits. */
1850 if (mode != VOIDmode && mode != BLKmode)
1851 {
1852 int mode_alignment = GET_MODE_BITSIZE (mode);
1853
1854 if (mode == XFmode)
1855 mode_alignment = 128;
1856 else if (mode == XCmode)
1857 mode_alignment = 256;
f5143c46 1858 /* Misaligned fields are always returned in memory. */
53c17031
JH
1859 if (bit_offset % mode_alignment)
1860 return 0;
1861 }
1862
1863 /* Classification of atomic types. */
1864 switch (mode)
1865 {
1866 case DImode:
1867 case SImode:
1868 case HImode:
1869 case QImode:
1870 case CSImode:
1871 case CHImode:
1872 case CQImode:
1873 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1874 classes[0] = X86_64_INTEGERSI_CLASS;
1875 else
1876 classes[0] = X86_64_INTEGER_CLASS;
1877 return 1;
1878 case CDImode:
1879 case TImode:
1880 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1881 return 2;
1882 case CTImode:
1883 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1884 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1885 return 4;
1886 case SFmode:
1887 if (!(bit_offset % 64))
1888 classes[0] = X86_64_SSESF_CLASS;
1889 else
1890 classes[0] = X86_64_SSE_CLASS;
1891 return 1;
1892 case DFmode:
1893 classes[0] = X86_64_SSEDF_CLASS;
1894 return 1;
1895 case TFmode:
1896 classes[0] = X86_64_X87_CLASS;
1897 classes[1] = X86_64_X87UP_CLASS;
1898 return 2;
1899 case TCmode:
1900 classes[0] = X86_64_X87_CLASS;
1901 classes[1] = X86_64_X87UP_CLASS;
1902 classes[2] = X86_64_X87_CLASS;
1903 classes[3] = X86_64_X87UP_CLASS;
1904 return 4;
1905 case DCmode:
1906 classes[0] = X86_64_SSEDF_CLASS;
1907 classes[1] = X86_64_SSEDF_CLASS;
1908 return 2;
1909 case SCmode:
1910 classes[0] = X86_64_SSE_CLASS;
1911 return 1;
e95d6b23
JH
1912 case V4SFmode:
1913 case V4SImode:
495333a6
JH
1914 case V16QImode:
1915 case V8HImode:
1916 case V2DFmode:
1917 case V2DImode:
e95d6b23
JH
1918 classes[0] = X86_64_SSE_CLASS;
1919 classes[1] = X86_64_SSEUP_CLASS;
1920 return 2;
1921 case V2SFmode:
1922 case V2SImode:
1923 case V4HImode:
1924 case V8QImode:
1194ca05 1925 return 0;
53c17031 1926 case BLKmode:
e95d6b23 1927 case VOIDmode:
53c17031
JH
1928 return 0;
1929 default:
1930 abort ();
1931 }
1932}
1933
1934/* Examine the argument and return set number of register required in each
f5143c46 1935 class. Return 0 iff parameter should be passed in memory. */
53c17031
JH
1936static int
1937examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1938 enum machine_mode mode;
1939 tree type;
1940 int *int_nregs, *sse_nregs;
1941 int in_return;
1942{
1943 enum x86_64_reg_class class[MAX_CLASSES];
1944 int n = classify_argument (mode, type, class, 0);
1945
1946 *int_nregs = 0;
1947 *sse_nregs = 0;
1948 if (!n)
1949 return 0;
1950 for (n--; n >= 0; n--)
1951 switch (class[n])
1952 {
1953 case X86_64_INTEGER_CLASS:
1954 case X86_64_INTEGERSI_CLASS:
1955 (*int_nregs)++;
1956 break;
1957 case X86_64_SSE_CLASS:
1958 case X86_64_SSESF_CLASS:
1959 case X86_64_SSEDF_CLASS:
1960 (*sse_nregs)++;
1961 break;
1962 case X86_64_NO_CLASS:
1963 case X86_64_SSEUP_CLASS:
1964 break;
1965 case X86_64_X87_CLASS:
1966 case X86_64_X87UP_CLASS:
1967 if (!in_return)
1968 return 0;
1969 break;
1970 case X86_64_MEMORY_CLASS:
1971 abort ();
1972 }
1973 return 1;
1974}
1975/* Construct container for the argument used by GCC interface. See
1976 FUNCTION_ARG for the detailed description. */
1977static rtx
1978construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1979 enum machine_mode mode;
1980 tree type;
1981 int in_return;
1982 int nintregs, nsseregs;
07933f72
GS
1983 const int * intreg;
1984 int sse_regno;
53c17031
JH
1985{
1986 enum machine_mode tmpmode;
1987 int bytes =
1988 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1989 enum x86_64_reg_class class[MAX_CLASSES];
1990 int n;
1991 int i;
1992 int nexps = 0;
1993 int needed_sseregs, needed_intregs;
1994 rtx exp[MAX_CLASSES];
1995 rtx ret;
1996
1997 n = classify_argument (mode, type, class, 0);
1998 if (TARGET_DEBUG_ARG)
1999 {
2000 if (!n)
2001 fprintf (stderr, "Memory class\n");
2002 else
2003 {
2004 fprintf (stderr, "Classes:");
2005 for (i = 0; i < n; i++)
2006 {
2007 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2008 }
2009 fprintf (stderr, "\n");
2010 }
2011 }
2012 if (!n)
2013 return NULL;
2014 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2015 return NULL;
2016 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2017 return NULL;
2018
2019 /* First construct simple cases. Avoid SCmode, since we want to use
2020 single register to pass this type. */
2021 if (n == 1 && mode != SCmode)
2022 switch (class[0])
2023 {
2024 case X86_64_INTEGER_CLASS:
2025 case X86_64_INTEGERSI_CLASS:
2026 return gen_rtx_REG (mode, intreg[0]);
2027 case X86_64_SSE_CLASS:
2028 case X86_64_SSESF_CLASS:
2029 case X86_64_SSEDF_CLASS:
2030 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2031 case X86_64_X87_CLASS:
2032 return gen_rtx_REG (mode, FIRST_STACK_REG);
2033 case X86_64_NO_CLASS:
2034 /* Zero sized array, struct or class. */
2035 return NULL;
2036 default:
2037 abort ();
2038 }
2039 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
e95d6b23 2040 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
53c17031
JH
2041 if (n == 2
2042 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2043 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2044 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2045 && class[1] == X86_64_INTEGER_CLASS
2046 && (mode == CDImode || mode == TImode)
2047 && intreg[0] + 1 == intreg[1])
2048 return gen_rtx_REG (mode, intreg[0]);
2049 if (n == 4
2050 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2051 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2052 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2053
2054 /* Otherwise figure out the entries of the PARALLEL. */
2055 for (i = 0; i < n; i++)
2056 {
2057 switch (class[i])
2058 {
2059 case X86_64_NO_CLASS:
2060 break;
2061 case X86_64_INTEGER_CLASS:
2062 case X86_64_INTEGERSI_CLASS:
2063 /* Merge TImodes on aligned occassions here too. */
2064 if (i * 8 + 8 > bytes)
2065 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2066 else if (class[i] == X86_64_INTEGERSI_CLASS)
2067 tmpmode = SImode;
2068 else
2069 tmpmode = DImode;
2070 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2071 if (tmpmode == BLKmode)
2072 tmpmode = DImode;
2073 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2074 gen_rtx_REG (tmpmode, *intreg),
2075 GEN_INT (i*8));
2076 intreg++;
2077 break;
2078 case X86_64_SSESF_CLASS:
2079 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2080 gen_rtx_REG (SFmode,
2081 SSE_REGNO (sse_regno)),
2082 GEN_INT (i*8));
2083 sse_regno++;
2084 break;
2085 case X86_64_SSEDF_CLASS:
2086 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2087 gen_rtx_REG (DFmode,
2088 SSE_REGNO (sse_regno)),
2089 GEN_INT (i*8));
2090 sse_regno++;
2091 break;
2092 case X86_64_SSE_CLASS:
2093 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2094 tmpmode = TImode, i++;
2095 else
2096 tmpmode = DImode;
2097 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2098 gen_rtx_REG (tmpmode,
2099 SSE_REGNO (sse_regno)),
2100 GEN_INT (i*8));
2101 sse_regno++;
2102 break;
2103 default:
2104 abort ();
2105 }
2106 }
2107 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2108 for (i = 0; i < nexps; i++)
2109 XVECEXP (ret, 0, i) = exp [i];
2110 return ret;
2111}
2112
b08de47e
MM
2113/* Update the data in CUM to advance over an argument
2114 of mode MODE and data type TYPE.
2115 (TYPE is null for libcalls where that information may not be available.) */
2116
2117void
2118function_arg_advance (cum, mode, type, named)
2119 CUMULATIVE_ARGS *cum; /* current arg information */
2120 enum machine_mode mode; /* current arg mode */
2121 tree type; /* type of the argument or 0 if lib support */
2122 int named; /* whether or not the argument was named */
2123{
5ac9118e
KG
2124 int bytes =
2125 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2126 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2127
2128 if (TARGET_DEBUG_ARG)
2129 fprintf (stderr,
e9a25f70 2130 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
b08de47e 2131 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
53c17031 2132 if (TARGET_64BIT)
b08de47e 2133 {
53c17031
JH
2134 int int_nregs, sse_nregs;
2135 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2136 cum->words += words;
2137 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
82a127a9 2138 {
53c17031
JH
2139 cum->nregs -= int_nregs;
2140 cum->sse_nregs -= sse_nregs;
2141 cum->regno += int_nregs;
2142 cum->sse_regno += sse_nregs;
82a127a9 2143 }
53c17031
JH
2144 else
2145 cum->words += words;
b08de47e 2146 }
a4f31c00 2147 else
82a127a9 2148 {
53c17031
JH
2149 if (TARGET_SSE && mode == TImode)
2150 {
2151 cum->sse_words += words;
2152 cum->sse_nregs -= 1;
2153 cum->sse_regno += 1;
2154 if (cum->sse_nregs <= 0)
2155 {
2156 cum->sse_nregs = 0;
2157 cum->sse_regno = 0;
2158 }
2159 }
2160 else
82a127a9 2161 {
53c17031
JH
2162 cum->words += words;
2163 cum->nregs -= words;
2164 cum->regno += words;
2165
2166 if (cum->nregs <= 0)
2167 {
2168 cum->nregs = 0;
2169 cum->regno = 0;
2170 }
82a127a9
CM
2171 }
2172 }
b08de47e
MM
2173 return;
2174}
2175
2176/* Define where to put the arguments to a function.
2177 Value is zero to push the argument on the stack,
2178 or a hard register in which to store the argument.
2179
2180 MODE is the argument's machine mode.
2181 TYPE is the data type of the argument (as a tree).
2182 This is null for libcalls where that information may
2183 not be available.
2184 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2185 the preceding args and about the function being called.
2186 NAMED is nonzero if this argument is a named parameter
2187 (otherwise it is an extra parameter matching an ellipsis). */
2188
07933f72 2189rtx
b08de47e
MM
2190function_arg (cum, mode, type, named)
2191 CUMULATIVE_ARGS *cum; /* current arg information */
2192 enum machine_mode mode; /* current arg mode */
2193 tree type; /* type of the argument or 0 if lib support */
2194 int named; /* != 0 for normal args, == 0 for ... args */
2195{
2196 rtx ret = NULL_RTX;
5ac9118e
KG
2197 int bytes =
2198 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2199 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2200
53c17031
JH
2201 /* Handle an hidden AL argument containing number of registers for varargs
2202 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2203 any AL settings. */
32ee7d1d 2204 if (mode == VOIDmode)
b08de47e 2205 {
53c17031
JH
2206 if (TARGET_64BIT)
2207 return GEN_INT (cum->maybe_vaarg
2208 ? (cum->sse_nregs < 0
2209 ? SSE_REGPARM_MAX
2210 : cum->sse_regno)
2211 : -1);
2212 else
2213 return constm1_rtx;
b08de47e 2214 }
53c17031
JH
2215 if (TARGET_64BIT)
2216 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2217 &x86_64_int_parameter_registers [cum->regno],
2218 cum->sse_regno);
2219 else
2220 switch (mode)
2221 {
2222 /* For now, pass fp/complex values on the stack. */
2223 default:
2224 break;
2225
2226 case BLKmode:
2227 case DImode:
2228 case SImode:
2229 case HImode:
2230 case QImode:
2231 if (words <= cum->nregs)
2232 ret = gen_rtx_REG (mode, cum->regno);
2233 break;
2234 case TImode:
2235 if (cum->sse_nregs)
2236 ret = gen_rtx_REG (mode, cum->sse_regno);
2237 break;
2238 }
b08de47e
MM
2239
2240 if (TARGET_DEBUG_ARG)
2241 {
2242 fprintf (stderr,
91ea38f9 2243 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
b08de47e
MM
2244 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2245
2246 if (ret)
91ea38f9 2247 print_simple_rtl (stderr, ret);
b08de47e
MM
2248 else
2249 fprintf (stderr, ", stack");
2250
2251 fprintf (stderr, " )\n");
2252 }
2253
2254 return ret;
2255}
53c17031
JH
2256
2257/* Gives the alignment boundary, in bits, of an argument with the specified mode
2258 and type. */
2259
2260int
2261ix86_function_arg_boundary (mode, type)
2262 enum machine_mode mode;
2263 tree type;
2264{
2265 int align;
2266 if (!TARGET_64BIT)
2267 return PARM_BOUNDARY;
2268 if (type)
2269 align = TYPE_ALIGN (type);
2270 else
2271 align = GET_MODE_ALIGNMENT (mode);
2272 if (align < PARM_BOUNDARY)
2273 align = PARM_BOUNDARY;
2274 if (align > 128)
2275 align = 128;
2276 return align;
2277}
2278
2279/* Return true if N is a possible register number of function value. */
2280bool
2281ix86_function_value_regno_p (regno)
2282 int regno;
2283{
2284 if (!TARGET_64BIT)
2285 {
2286 return ((regno) == 0
2287 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2288 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2289 }
2290 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2291 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2292 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2293}
2294
2295/* Define how to find the value returned by a function.
2296 VALTYPE is the data type of the value (as a tree).
2297 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2298 otherwise, FUNC is 0. */
2299rtx
2300ix86_function_value (valtype)
2301 tree valtype;
2302{
2303 if (TARGET_64BIT)
2304 {
2305 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2306 REGPARM_MAX, SSE_REGPARM_MAX,
2307 x86_64_int_return_registers, 0);
2308 /* For zero sized structures, construct_continer return NULL, but we need
2309 to keep rest of compiler happy by returning meaningfull value. */
2310 if (!ret)
2311 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2312 return ret;
2313 }
2314 else
b069de3b
SS
2315 return gen_rtx_REG (TYPE_MODE (valtype),
2316 ix86_value_regno (TYPE_MODE (valtype)));
53c17031
JH
2317}
2318
f5143c46 2319/* Return false iff type is returned in memory. */
53c17031
JH
2320int
2321ix86_return_in_memory (type)
2322 tree type;
2323{
2324 int needed_intregs, needed_sseregs;
2325 if (TARGET_64BIT)
2326 {
2327 return !examine_argument (TYPE_MODE (type), type, 1,
2328 &needed_intregs, &needed_sseregs);
2329 }
2330 else
2331 {
2332 if (TYPE_MODE (type) == BLKmode
2333 || (VECTOR_MODE_P (TYPE_MODE (type))
2334 && int_size_in_bytes (type) == 8)
2335 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2336 && TYPE_MODE (type) != TFmode
2337 && !VECTOR_MODE_P (TYPE_MODE (type))))
2338 return 1;
2339 return 0;
2340 }
2341}
2342
2343/* Define how to find the value returned by a library function
2344 assuming the value has mode MODE. */
2345rtx
2346ix86_libcall_value (mode)
2347 enum machine_mode mode;
2348{
2349 if (TARGET_64BIT)
2350 {
2351 switch (mode)
2352 {
2353 case SFmode:
2354 case SCmode:
2355 case DFmode:
2356 case DCmode:
2357 return gen_rtx_REG (mode, FIRST_SSE_REG);
2358 case TFmode:
2359 case TCmode:
2360 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2361 default:
2362 return gen_rtx_REG (mode, 0);
2363 }
2364 }
2365 else
b069de3b
SS
2366 return gen_rtx_REG (mode, ix86_value_regno (mode));
2367}
2368
2369/* Given a mode, return the register to use for a return value. */
2370
2371static int
2372ix86_value_regno (mode)
2373 enum machine_mode mode;
2374{
2375 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2376 return FIRST_FLOAT_REG;
2377 if (mode == TImode || VECTOR_MODE_P (mode))
2378 return FIRST_SSE_REG;
2379 return 0;
53c17031 2380}
ad919812
JH
2381\f
2382/* Create the va_list data type. */
53c17031 2383
ad919812
JH
2384tree
2385ix86_build_va_list ()
2386{
2387 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
53c17031 2388
ad919812
JH
2389 /* For i386 we use plain pointer to argument area. */
2390 if (!TARGET_64BIT)
2391 return build_pointer_type (char_type_node);
2392
f1e639b1 2393 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
ad919812
JH
2394 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2395
fce5a9f2 2396 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
ad919812 2397 unsigned_type_node);
fce5a9f2 2398 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
ad919812
JH
2399 unsigned_type_node);
2400 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2401 ptr_type_node);
2402 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2403 ptr_type_node);
2404
2405 DECL_FIELD_CONTEXT (f_gpr) = record;
2406 DECL_FIELD_CONTEXT (f_fpr) = record;
2407 DECL_FIELD_CONTEXT (f_ovf) = record;
2408 DECL_FIELD_CONTEXT (f_sav) = record;
2409
2410 TREE_CHAIN (record) = type_decl;
2411 TYPE_NAME (record) = type_decl;
2412 TYPE_FIELDS (record) = f_gpr;
2413 TREE_CHAIN (f_gpr) = f_fpr;
2414 TREE_CHAIN (f_fpr) = f_ovf;
2415 TREE_CHAIN (f_ovf) = f_sav;
2416
2417 layout_type (record);
2418
2419 /* The correct type is an array type of one element. */
2420 return build_array_type (record, build_index_type (size_zero_node));
2421}
2422
2423/* Perform any needed actions needed for a function that is receiving a
fce5a9f2 2424 variable number of arguments.
ad919812
JH
2425
2426 CUM is as above.
2427
2428 MODE and TYPE are the mode and type of the current parameter.
2429
2430 PRETEND_SIZE is a variable that should be set to the amount of stack
2431 that must be pushed by the prolog to pretend that our caller pushed
2432 it.
2433
2434 Normally, this macro will push all remaining incoming registers on the
2435 stack and set PRETEND_SIZE to the length of the registers pushed. */
2436
2437void
2438ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2439 CUMULATIVE_ARGS *cum;
2440 enum machine_mode mode;
2441 tree type;
2442 int *pretend_size ATTRIBUTE_UNUSED;
2443 int no_rtl;
2444
2445{
2446 CUMULATIVE_ARGS next_cum;
2447 rtx save_area = NULL_RTX, mem;
2448 rtx label;
2449 rtx label_ref;
2450 rtx tmp_reg;
2451 rtx nsse_reg;
2452 int set;
2453 tree fntype;
2454 int stdarg_p;
2455 int i;
2456
2457 if (!TARGET_64BIT)
2458 return;
2459
2460 /* Indicate to allocate space on the stack for varargs save area. */
2461 ix86_save_varrargs_registers = 1;
2462
2463 fntype = TREE_TYPE (current_function_decl);
2464 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2465 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2466 != void_type_node));
2467
2468 /* For varargs, we do not want to skip the dummy va_dcl argument.
2469 For stdargs, we do want to skip the last named argument. */
2470 next_cum = *cum;
2471 if (stdarg_p)
2472 function_arg_advance (&next_cum, mode, type, 1);
2473
2474 if (!no_rtl)
2475 save_area = frame_pointer_rtx;
2476
2477 set = get_varargs_alias_set ();
2478
2479 for (i = next_cum.regno; i < ix86_regparm; i++)
2480 {
2481 mem = gen_rtx_MEM (Pmode,
2482 plus_constant (save_area, i * UNITS_PER_WORD));
0692acba 2483 set_mem_alias_set (mem, set);
ad919812
JH
2484 emit_move_insn (mem, gen_rtx_REG (Pmode,
2485 x86_64_int_parameter_registers[i]));
2486 }
2487
2488 if (next_cum.sse_nregs)
2489 {
2490 /* Now emit code to save SSE registers. The AX parameter contains number
2491 of SSE parameter regsiters used to call this function. We use
2492 sse_prologue_save insn template that produces computed jump across
2493 SSE saves. We need some preparation work to get this working. */
2494
2495 label = gen_label_rtx ();
2496 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2497
2498 /* Compute address to jump to :
2499 label - 5*eax + nnamed_sse_arguments*5 */
2500 tmp_reg = gen_reg_rtx (Pmode);
2501 nsse_reg = gen_reg_rtx (Pmode);
2502 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2503 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
44cf5b6a 2504 gen_rtx_MULT (Pmode, nsse_reg,
ad919812
JH
2505 GEN_INT (4))));
2506 if (next_cum.sse_regno)
2507 emit_move_insn
2508 (nsse_reg,
2509 gen_rtx_CONST (DImode,
2510 gen_rtx_PLUS (DImode,
2511 label_ref,
2512 GEN_INT (next_cum.sse_regno * 4))));
2513 else
2514 emit_move_insn (nsse_reg, label_ref);
2515 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2516
2517 /* Compute address of memory block we save into. We always use pointer
2518 pointing 127 bytes after first byte to store - this is needed to keep
2519 instruction size limited by 4 bytes. */
2520 tmp_reg = gen_reg_rtx (Pmode);
8ac61af7
RK
2521 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2522 plus_constant (save_area,
2523 8 * REGPARM_MAX + 127)));
ad919812 2524 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
14f73b5a 2525 set_mem_alias_set (mem, set);
8ac61af7 2526 set_mem_align (mem, BITS_PER_WORD);
ad919812
JH
2527
2528 /* And finally do the dirty job! */
8ac61af7
RK
2529 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2530 GEN_INT (next_cum.sse_regno), label));
ad919812
JH
2531 }
2532
2533}
2534
2535/* Implement va_start. */
2536
2537void
e5faf155 2538ix86_va_start (valist, nextarg)
ad919812
JH
2539 tree valist;
2540 rtx nextarg;
2541{
2542 HOST_WIDE_INT words, n_gpr, n_fpr;
2543 tree f_gpr, f_fpr, f_ovf, f_sav;
2544 tree gpr, fpr, ovf, sav, t;
2545
2546 /* Only 64bit target needs something special. */
2547 if (!TARGET_64BIT)
2548 {
e5faf155 2549 std_expand_builtin_va_start (valist, nextarg);
ad919812
JH
2550 return;
2551 }
2552
2553 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2554 f_fpr = TREE_CHAIN (f_gpr);
2555 f_ovf = TREE_CHAIN (f_fpr);
2556 f_sav = TREE_CHAIN (f_ovf);
2557
2558 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2559 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2560 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2561 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2562 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2563
2564 /* Count number of gp and fp argument registers used. */
2565 words = current_function_args_info.words;
2566 n_gpr = current_function_args_info.regno;
2567 n_fpr = current_function_args_info.sse_regno;
2568
2569 if (TARGET_DEBUG_ARG)
2570 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
b531087a 2571 (int) words, (int) n_gpr, (int) n_fpr);
ad919812
JH
2572
2573 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2574 build_int_2 (n_gpr * 8, 0));
2575 TREE_SIDE_EFFECTS (t) = 1;
2576 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2577
2578 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2579 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2580 TREE_SIDE_EFFECTS (t) = 1;
2581 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2582
2583 /* Find the overflow area. */
2584 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2585 if (words != 0)
2586 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2587 build_int_2 (words * UNITS_PER_WORD, 0));
2588 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2589 TREE_SIDE_EFFECTS (t) = 1;
2590 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2591
2592 /* Find the register save area.
2593 Prologue of the function save it right above stack frame. */
2594 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2595 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2596 TREE_SIDE_EFFECTS (t) = 1;
2597 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2598}
2599
2600/* Implement va_arg. */
2601rtx
2602ix86_va_arg (valist, type)
2603 tree valist, type;
2604{
0139adca 2605 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
ad919812
JH
2606 tree f_gpr, f_fpr, f_ovf, f_sav;
2607 tree gpr, fpr, ovf, sav, t;
b932f770 2608 int size, rsize;
ad919812
JH
2609 rtx lab_false, lab_over = NULL_RTX;
2610 rtx addr_rtx, r;
2611 rtx container;
2612
2613 /* Only 64bit target needs something special. */
2614 if (!TARGET_64BIT)
2615 {
2616 return std_expand_builtin_va_arg (valist, type);
2617 }
2618
2619 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2620 f_fpr = TREE_CHAIN (f_gpr);
2621 f_ovf = TREE_CHAIN (f_fpr);
2622 f_sav = TREE_CHAIN (f_ovf);
2623
2624 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2625 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2626 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2627 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2628 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2629
2630 size = int_size_in_bytes (type);
2631 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2632
2633 container = construct_container (TYPE_MODE (type), type, 0,
2634 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2635 /*
2636 * Pull the value out of the saved registers ...
2637 */
2638
2639 addr_rtx = gen_reg_rtx (Pmode);
2640
2641 if (container)
2642 {
2643 rtx int_addr_rtx, sse_addr_rtx;
2644 int needed_intregs, needed_sseregs;
2645 int need_temp;
2646
2647 lab_over = gen_label_rtx ();
2648 lab_false = gen_label_rtx ();
8bad7136 2649
ad919812
JH
2650 examine_argument (TYPE_MODE (type), type, 0,
2651 &needed_intregs, &needed_sseregs);
2652
2653
2654 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2655 || TYPE_ALIGN (type) > 128);
2656
2657 /* In case we are passing structure, verify that it is consetuctive block
2658 on the register save area. If not we need to do moves. */
2659 if (!need_temp && !REG_P (container))
2660 {
2661 /* Verify that all registers are strictly consetuctive */
2662 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2663 {
2664 int i;
2665
2666 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2667 {
2668 rtx slot = XVECEXP (container, 0, i);
b531087a 2669 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
ad919812
JH
2670 || INTVAL (XEXP (slot, 1)) != i * 16)
2671 need_temp = 1;
2672 }
2673 }
2674 else
2675 {
2676 int i;
2677
2678 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2679 {
2680 rtx slot = XVECEXP (container, 0, i);
b531087a 2681 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
ad919812
JH
2682 || INTVAL (XEXP (slot, 1)) != i * 8)
2683 need_temp = 1;
2684 }
2685 }
2686 }
2687 if (!need_temp)
2688 {
2689 int_addr_rtx = addr_rtx;
2690 sse_addr_rtx = addr_rtx;
2691 }
2692 else
2693 {
2694 int_addr_rtx = gen_reg_rtx (Pmode);
2695 sse_addr_rtx = gen_reg_rtx (Pmode);
2696 }
2697 /* First ensure that we fit completely in registers. */
2698 if (needed_intregs)
2699 {
2700 emit_cmp_and_jump_insns (expand_expr
2701 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2702 GEN_INT ((REGPARM_MAX - needed_intregs +
2703 1) * 8), GE, const1_rtx, SImode,
d43e0b7d 2704 1, lab_false);
ad919812
JH
2705 }
2706 if (needed_sseregs)
2707 {
2708 emit_cmp_and_jump_insns (expand_expr
2709 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2710 GEN_INT ((SSE_REGPARM_MAX -
2711 needed_sseregs + 1) * 16 +
2712 REGPARM_MAX * 8), GE, const1_rtx,
d43e0b7d 2713 SImode, 1, lab_false);
ad919812
JH
2714 }
2715
2716 /* Compute index to start of area used for integer regs. */
2717 if (needed_intregs)
2718 {
2719 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2720 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2721 if (r != int_addr_rtx)
2722 emit_move_insn (int_addr_rtx, r);
2723 }
2724 if (needed_sseregs)
2725 {
2726 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2727 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2728 if (r != sse_addr_rtx)
2729 emit_move_insn (sse_addr_rtx, r);
2730 }
2731 if (need_temp)
2732 {
2733 int i;
2734 rtx mem;
2735
b932f770
JH
2736 /* Never use the memory itself, as it has the alias set. */
2737 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2738 mem = gen_rtx_MEM (BLKmode, addr_rtx);
0692acba 2739 set_mem_alias_set (mem, get_varargs_alias_set ());
8ac61af7 2740 set_mem_align (mem, BITS_PER_UNIT);
b932f770 2741
ad919812
JH
2742 for (i = 0; i < XVECLEN (container, 0); i++)
2743 {
2744 rtx slot = XVECEXP (container, 0, i);
2745 rtx reg = XEXP (slot, 0);
2746 enum machine_mode mode = GET_MODE (reg);
2747 rtx src_addr;
2748 rtx src_mem;
2749 int src_offset;
2750 rtx dest_mem;
2751
2752 if (SSE_REGNO_P (REGNO (reg)))
2753 {
2754 src_addr = sse_addr_rtx;
2755 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2756 }
2757 else
2758 {
2759 src_addr = int_addr_rtx;
2760 src_offset = REGNO (reg) * 8;
2761 }
2762 src_mem = gen_rtx_MEM (mode, src_addr);
0692acba 2763 set_mem_alias_set (src_mem, get_varargs_alias_set ());
ad919812
JH
2764 src_mem = adjust_address (src_mem, mode, src_offset);
2765 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
ad919812
JH
2766 emit_move_insn (dest_mem, src_mem);
2767 }
2768 }
2769
2770 if (needed_intregs)
2771 {
2772 t =
2773 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2774 build_int_2 (needed_intregs * 8, 0));
2775 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2776 TREE_SIDE_EFFECTS (t) = 1;
2777 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2778 }
2779 if (needed_sseregs)
2780 {
2781 t =
2782 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2783 build_int_2 (needed_sseregs * 16, 0));
2784 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2785 TREE_SIDE_EFFECTS (t) = 1;
2786 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2787 }
2788
2789 emit_jump_insn (gen_jump (lab_over));
2790 emit_barrier ();
2791 emit_label (lab_false);
2792 }
2793
2794 /* ... otherwise out of the overflow area. */
2795
2796 /* Care for on-stack alignment if needed. */
2797 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2798 t = ovf;
2799 else
2800 {
2801 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2802 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2803 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2804 }
2805 t = save_expr (t);
2806
2807 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2808 if (r != addr_rtx)
2809 emit_move_insn (addr_rtx, r);
2810
2811 t =
2812 build (PLUS_EXPR, TREE_TYPE (t), t,
2813 build_int_2 (rsize * UNITS_PER_WORD, 0));
2814 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2815 TREE_SIDE_EFFECTS (t) = 1;
2816 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2817
2818 if (container)
2819 emit_label (lab_over);
2820
ad919812
JH
2821 return addr_rtx;
2822}
2823\f
c3c637e3
GS
2824/* Return nonzero if OP is either a i387 or SSE fp register. */
2825int
2826any_fp_register_operand (op, mode)
2827 rtx op;
2828 enum machine_mode mode ATTRIBUTE_UNUSED;
2829{
2830 return ANY_FP_REG_P (op);
2831}
2832
2833/* Return nonzero if OP is an i387 fp register. */
2834int
2835fp_register_operand (op, mode)
2836 rtx op;
2837 enum machine_mode mode ATTRIBUTE_UNUSED;
2838{
2839 return FP_REG_P (op);
2840}
2841
2842/* Return nonzero if OP is a non-fp register_operand. */
2843int
2844register_and_not_any_fp_reg_operand (op, mode)
2845 rtx op;
2846 enum machine_mode mode;
2847{
2848 return register_operand (op, mode) && !ANY_FP_REG_P (op);
2849}
2850
2851/* Return nonzero of OP is a register operand other than an
2852 i387 fp register. */
2853int
2854register_and_not_fp_reg_operand (op, mode)
2855 rtx op;
2856 enum machine_mode mode;
2857{
2858 return register_operand (op, mode) && !FP_REG_P (op);
2859}
2860
7dd4b4a3
JH
2861/* Return nonzero if OP is general operand representable on x86_64. */
2862
2863int
2864x86_64_general_operand (op, mode)
2865 rtx op;
2866 enum machine_mode mode;
2867{
2868 if (!TARGET_64BIT)
2869 return general_operand (op, mode);
2870 if (nonimmediate_operand (op, mode))
2871 return 1;
2872 return x86_64_sign_extended_value (op);
2873}
2874
2875/* Return nonzero if OP is general operand representable on x86_64
d6a7951f 2876 as either sign extended or zero extended constant. */
7dd4b4a3
JH
2877
2878int
2879x86_64_szext_general_operand (op, mode)
2880 rtx op;
2881 enum machine_mode mode;
2882{
2883 if (!TARGET_64BIT)
2884 return general_operand (op, mode);
2885 if (nonimmediate_operand (op, mode))
2886 return 1;
2887 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2888}
2889
2890/* Return nonzero if OP is nonmemory operand representable on x86_64. */
2891
2892int
2893x86_64_nonmemory_operand (op, mode)
2894 rtx op;
2895 enum machine_mode mode;
2896{
2897 if (!TARGET_64BIT)
2898 return nonmemory_operand (op, mode);
2899 if (register_operand (op, mode))
2900 return 1;
2901 return x86_64_sign_extended_value (op);
2902}
2903
2904/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2905
2906int
2907x86_64_movabs_operand (op, mode)
2908 rtx op;
2909 enum machine_mode mode;
2910{
2911 if (!TARGET_64BIT || !flag_pic)
2912 return nonmemory_operand (op, mode);
2913 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2914 return 1;
2915 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2916 return 1;
2917 return 0;
2918}
2919
2920/* Return nonzero if OP is nonmemory operand representable on x86_64. */
2921
2922int
2923x86_64_szext_nonmemory_operand (op, mode)
2924 rtx op;
2925 enum machine_mode mode;
2926{
2927 if (!TARGET_64BIT)
2928 return nonmemory_operand (op, mode);
2929 if (register_operand (op, mode))
2930 return 1;
2931 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2932}
2933
2934/* Return nonzero if OP is immediate operand representable on x86_64. */
2935
2936int
2937x86_64_immediate_operand (op, mode)
2938 rtx op;
2939 enum machine_mode mode;
2940{
2941 if (!TARGET_64BIT)
2942 return immediate_operand (op, mode);
2943 return x86_64_sign_extended_value (op);
2944}
2945
2946/* Return nonzero if OP is immediate operand representable on x86_64. */
2947
2948int
2949x86_64_zext_immediate_operand (op, mode)
2950 rtx op;
2951 enum machine_mode mode ATTRIBUTE_UNUSED;
2952{
2953 return x86_64_zero_extended_value (op);
2954}
2955
8bad7136
JL
2956/* Return nonzero if OP is (const_int 1), else return zero. */
2957
2958int
2959const_int_1_operand (op, mode)
2960 rtx op;
2961 enum machine_mode mode ATTRIBUTE_UNUSED;
2962{
2963 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2964}
2965
794a292d
JJ
2966/* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
2967 for shift & compare patterns, as shifting by 0 does not change flags),
2968 else return zero. */
2969
2970int
2971const_int_1_31_operand (op, mode)
2972 rtx op;
2973 enum machine_mode mode ATTRIBUTE_UNUSED;
2974{
2975 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
2976}
2977
e075ae69
RH
2978/* Returns 1 if OP is either a symbol reference or a sum of a symbol
2979 reference and a constant. */
b08de47e
MM
2980
2981int
e075ae69
RH
2982symbolic_operand (op, mode)
2983 register rtx op;
2984 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 2985{
e075ae69 2986 switch (GET_CODE (op))
2a2ab3f9 2987 {
e075ae69
RH
2988 case SYMBOL_REF:
2989 case LABEL_REF:
2990 return 1;
2991
2992 case CONST:
2993 op = XEXP (op, 0);
2994 if (GET_CODE (op) == SYMBOL_REF
2995 || GET_CODE (op) == LABEL_REF
2996 || (GET_CODE (op) == UNSPEC
8ee41eaf
RH
2997 && (XINT (op, 1) == UNSPEC_GOT
2998 || XINT (op, 1) == UNSPEC_GOTOFF
2999 || XINT (op, 1) == UNSPEC_GOTPCREL)))
e075ae69
RH
3000 return 1;
3001 if (GET_CODE (op) != PLUS
3002 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3003 return 0;
3004
3005 op = XEXP (op, 0);
3006 if (GET_CODE (op) == SYMBOL_REF
3007 || GET_CODE (op) == LABEL_REF)
3008 return 1;
3009 /* Only @GOTOFF gets offsets. */
3010 if (GET_CODE (op) != UNSPEC
8ee41eaf 3011 || XINT (op, 1) != UNSPEC_GOTOFF)
e075ae69
RH
3012 return 0;
3013
3014 op = XVECEXP (op, 0, 0);
3015 if (GET_CODE (op) == SYMBOL_REF
3016 || GET_CODE (op) == LABEL_REF)
3017 return 1;
3018 return 0;
3019
3020 default:
3021 return 0;
2a2ab3f9
JVA
3022 }
3023}
2a2ab3f9 3024
e075ae69 3025/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 3026
e075ae69
RH
3027int
3028pic_symbolic_operand (op, mode)
3029 register rtx op;
3030 enum machine_mode mode ATTRIBUTE_UNUSED;
3031{
6eb791fc
JH
3032 if (GET_CODE (op) != CONST)
3033 return 0;
3034 op = XEXP (op, 0);
3035 if (TARGET_64BIT)
3036 {
3037 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3038 return 1;
3039 }
fce5a9f2 3040 else
2a2ab3f9 3041 {
e075ae69
RH
3042 if (GET_CODE (op) == UNSPEC)
3043 return 1;
3044 if (GET_CODE (op) != PLUS
3045 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3046 return 0;
3047 op = XEXP (op, 0);
3048 if (GET_CODE (op) == UNSPEC)
3049 return 1;
2a2ab3f9 3050 }
e075ae69 3051 return 0;
2a2ab3f9 3052}
2a2ab3f9 3053
623fe810
RH
3054/* Return true if OP is a symbolic operand that resolves locally. */
3055
3056static int
3057local_symbolic_operand (op, mode)
3058 rtx op;
3059 enum machine_mode mode ATTRIBUTE_UNUSED;
3060{
3061 if (GET_CODE (op) == LABEL_REF)
3062 return 1;
3063
3064 if (GET_CODE (op) == CONST
3065 && GET_CODE (XEXP (op, 0)) == PLUS
3066 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3067 op = XEXP (XEXP (op, 0), 0);
3068
3069 if (GET_CODE (op) != SYMBOL_REF)
3070 return 0;
3071
3072 /* These we've been told are local by varasm and encode_section_info
3073 respectively. */
3074 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3075 return 1;
3076
3077 /* There is, however, a not insubstantial body of code in the rest of
fce5a9f2 3078 the compiler that assumes it can just stick the results of
623fe810
RH
3079 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3080 /* ??? This is a hack. Should update the body of the compiler to
fb49053f 3081 always create a DECL an invoke targetm.encode_section_info. */
623fe810
RH
3082 if (strncmp (XSTR (op, 0), internal_label_prefix,
3083 internal_label_prefix_len) == 0)
3084 return 1;
3085
3086 return 0;
3087}
3088
f996902d
RH
3089/* Test for various thread-local symbols. See ix86_encode_section_info. */
3090
3091int
3092tls_symbolic_operand (op, mode)
3093 register rtx op;
3094 enum machine_mode mode ATTRIBUTE_UNUSED;
3095{
3096 const char *symbol_str;
3097
3098 if (GET_CODE (op) != SYMBOL_REF)
3099 return 0;
3100 symbol_str = XSTR (op, 0);
3101
3102 if (symbol_str[0] != '%')
3103 return 0;
755ac5d4 3104 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
f996902d
RH
3105}
3106
3107static int
3108tls_symbolic_operand_1 (op, kind)
3109 rtx op;
3110 enum tls_model kind;
3111{
3112 const char *symbol_str;
3113
3114 if (GET_CODE (op) != SYMBOL_REF)
3115 return 0;
3116 symbol_str = XSTR (op, 0);
3117
3118 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3119}
3120
3121int
3122global_dynamic_symbolic_operand (op, mode)
3123 register rtx op;
3124 enum machine_mode mode ATTRIBUTE_UNUSED;
3125{
3126 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3127}
3128
3129int
3130local_dynamic_symbolic_operand (op, mode)
3131 register rtx op;
3132 enum machine_mode mode ATTRIBUTE_UNUSED;
3133{
3134 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3135}
3136
3137int
3138initial_exec_symbolic_operand (op, mode)
3139 register rtx op;
3140 enum machine_mode mode ATTRIBUTE_UNUSED;
3141{
3142 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3143}
3144
3145int
3146local_exec_symbolic_operand (op, mode)
3147 register rtx op;
3148 enum machine_mode mode ATTRIBUTE_UNUSED;
3149{
3150 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3151}
3152
28d52ffb
RH
3153/* Test for a valid operand for a call instruction. Don't allow the
3154 arg pointer register or virtual regs since they may decay into
3155 reg + const, which the patterns can't handle. */
2a2ab3f9 3156
e075ae69
RH
3157int
3158call_insn_operand (op, mode)
3159 rtx op;
3160 enum machine_mode mode ATTRIBUTE_UNUSED;
3161{
e075ae69
RH
3162 /* Disallow indirect through a virtual register. This leads to
3163 compiler aborts when trying to eliminate them. */
3164 if (GET_CODE (op) == REG
3165 && (op == arg_pointer_rtx
564d80f4 3166 || op == frame_pointer_rtx
e075ae69
RH
3167 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3168 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3169 return 0;
2a2ab3f9 3170
28d52ffb
RH
3171 /* Disallow `call 1234'. Due to varying assembler lameness this
3172 gets either rejected or translated to `call .+1234'. */
3173 if (GET_CODE (op) == CONST_INT)
3174 return 0;
3175
cbbf65e0
RH
3176 /* Explicitly allow SYMBOL_REF even if pic. */
3177 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 3178 return 1;
2a2ab3f9 3179
cbbf65e0
RH
3180 /* Otherwise we can allow any general_operand in the address. */
3181 return general_operand (op, Pmode);
e075ae69 3182}
79325812 3183
e075ae69
RH
3184int
3185constant_call_address_operand (op, mode)
3186 rtx op;
3187 enum machine_mode mode ATTRIBUTE_UNUSED;
3188{
eaf19aba
JJ
3189 if (GET_CODE (op) == CONST
3190 && GET_CODE (XEXP (op, 0)) == PLUS
3191 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3192 op = XEXP (XEXP (op, 0), 0);
e1ff012c 3193 return GET_CODE (op) == SYMBOL_REF;
e075ae69 3194}
2a2ab3f9 3195
e075ae69 3196/* Match exactly zero and one. */
e9a25f70 3197
0f290768 3198int
e075ae69
RH
3199const0_operand (op, mode)
3200 register rtx op;
3201 enum machine_mode mode;
3202{
3203 return op == CONST0_RTX (mode);
3204}
e9a25f70 3205
0f290768 3206int
e075ae69
RH
3207const1_operand (op, mode)
3208 register rtx op;
3209 enum machine_mode mode ATTRIBUTE_UNUSED;
3210{
3211 return op == const1_rtx;
3212}
2a2ab3f9 3213
e075ae69 3214/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 3215
e075ae69
RH
3216int
3217const248_operand (op, mode)
3218 register rtx op;
3219 enum machine_mode mode ATTRIBUTE_UNUSED;
3220{
3221 return (GET_CODE (op) == CONST_INT
3222 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3223}
e9a25f70 3224
e075ae69 3225/* True if this is a constant appropriate for an increment or decremenmt. */
81fd0956 3226
e075ae69
RH
3227int
3228incdec_operand (op, mode)
3229 register rtx op;
0631e0bf 3230 enum machine_mode mode ATTRIBUTE_UNUSED;
e075ae69 3231{
f5143c46 3232 /* On Pentium4, the inc and dec operations causes extra dependency on flag
b4e89e2d
JH
3233 registers, since carry flag is not set. */
3234 if (TARGET_PENTIUM4 && !optimize_size)
3235 return 0;
2b1c08f5 3236 return op == const1_rtx || op == constm1_rtx;
e075ae69 3237}
2a2ab3f9 3238
371bc54b
JH
3239/* Return nonzero if OP is acceptable as operand of DImode shift
3240 expander. */
3241
3242int
3243shiftdi_operand (op, mode)
3244 rtx op;
3245 enum machine_mode mode ATTRIBUTE_UNUSED;
3246{
3247 if (TARGET_64BIT)
3248 return nonimmediate_operand (op, mode);
3249 else
3250 return register_operand (op, mode);
3251}
3252
0f290768 3253/* Return false if this is the stack pointer, or any other fake
e075ae69
RH
3254 register eliminable to the stack pointer. Otherwise, this is
3255 a register operand.
2a2ab3f9 3256
e075ae69
RH
3257 This is used to prevent esp from being used as an index reg.
3258 Which would only happen in pathological cases. */
5f1ec3e6 3259
e075ae69
RH
3260int
3261reg_no_sp_operand (op, mode)
3262 register rtx op;
3263 enum machine_mode mode;
3264{
3265 rtx t = op;
3266 if (GET_CODE (t) == SUBREG)
3267 t = SUBREG_REG (t);
564d80f4 3268 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 3269 return 0;
2a2ab3f9 3270
e075ae69 3271 return register_operand (op, mode);
2a2ab3f9 3272}
b840bfb0 3273
915119a5
BS
3274int
3275mmx_reg_operand (op, mode)
3276 register rtx op;
bd793c65 3277 enum machine_mode mode ATTRIBUTE_UNUSED;
915119a5
BS
3278{
3279 return MMX_REG_P (op);
3280}
3281
2c5a510c
RH
3282/* Return false if this is any eliminable register. Otherwise
3283 general_operand. */
3284
3285int
3286general_no_elim_operand (op, mode)
3287 register rtx op;
3288 enum machine_mode mode;
3289{
3290 rtx t = op;
3291 if (GET_CODE (t) == SUBREG)
3292 t = SUBREG_REG (t);
3293 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3294 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3295 || t == virtual_stack_dynamic_rtx)
3296 return 0;
1020a5ab
RH
3297 if (REG_P (t)
3298 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3299 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3300 return 0;
2c5a510c
RH
3301
3302 return general_operand (op, mode);
3303}
3304
3305/* Return false if this is any eliminable register. Otherwise
3306 register_operand or const_int. */
3307
3308int
3309nonmemory_no_elim_operand (op, mode)
3310 register rtx op;
3311 enum machine_mode mode;
3312{
3313 rtx t = op;
3314 if (GET_CODE (t) == SUBREG)
3315 t = SUBREG_REG (t);
3316 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3317 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3318 || t == virtual_stack_dynamic_rtx)
3319 return 0;
3320
3321 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3322}
3323
7ec70495
JH
3324/* Return false if this is any eliminable register or stack register,
3325 otherwise work like register_operand. */
3326
3327int
3328index_register_operand (op, mode)
3329 register rtx op;
3330 enum machine_mode mode;
3331{
3332 rtx t = op;
3333 if (GET_CODE (t) == SUBREG)
3334 t = SUBREG_REG (t);
3335 if (!REG_P (t))
3336 return 0;
3337 if (t == arg_pointer_rtx
3338 || t == frame_pointer_rtx
3339 || t == virtual_incoming_args_rtx
3340 || t == virtual_stack_vars_rtx
3341 || t == virtual_stack_dynamic_rtx
3342 || REGNO (t) == STACK_POINTER_REGNUM)
3343 return 0;
3344
3345 return general_operand (op, mode);
3346}
3347
e075ae69 3348/* Return true if op is a Q_REGS class register. */
b840bfb0 3349
e075ae69
RH
3350int
3351q_regs_operand (op, mode)
3352 register rtx op;
3353 enum machine_mode mode;
b840bfb0 3354{
e075ae69
RH
3355 if (mode != VOIDmode && GET_MODE (op) != mode)
3356 return 0;
3357 if (GET_CODE (op) == SUBREG)
3358 op = SUBREG_REG (op);
7799175f 3359 return ANY_QI_REG_P (op);
0f290768 3360}
b840bfb0 3361
e075ae69 3362/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 3363
e075ae69
RH
3364int
3365non_q_regs_operand (op, mode)
3366 register rtx op;
3367 enum machine_mode mode;
3368{
3369 if (mode != VOIDmode && GET_MODE (op) != mode)
3370 return 0;
3371 if (GET_CODE (op) == SUBREG)
3372 op = SUBREG_REG (op);
3373 return NON_QI_REG_P (op);
0f290768 3374}
b840bfb0 3375
915119a5
BS
3376/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3377 insns. */
3378int
3379sse_comparison_operator (op, mode)
3380 rtx op;
3381 enum machine_mode mode ATTRIBUTE_UNUSED;
3382{
3383 enum rtx_code code = GET_CODE (op);
a46d1d38
JH
3384 switch (code)
3385 {
3386 /* Operations supported directly. */
3387 case EQ:
3388 case LT:
3389 case LE:
3390 case UNORDERED:
3391 case NE:
3392 case UNGE:
3393 case UNGT:
3394 case ORDERED:
3395 return 1;
3396 /* These are equivalent to ones above in non-IEEE comparisons. */
3397 case UNEQ:
3398 case UNLT:
3399 case UNLE:
3400 case LTGT:
3401 case GE:
3402 case GT:
3403 return !TARGET_IEEE_FP;
3404 default:
3405 return 0;
3406 }
915119a5 3407}
9076b9c1 3408/* Return 1 if OP is a valid comparison operator in valid mode. */
e075ae69 3409int
9076b9c1
JH
3410ix86_comparison_operator (op, mode)
3411 register rtx op;
3412 enum machine_mode mode;
e075ae69 3413{
9076b9c1 3414 enum machine_mode inmode;
9a915772 3415 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
3416 if (mode != VOIDmode && GET_MODE (op) != mode)
3417 return 0;
9a915772
JH
3418 if (GET_RTX_CLASS (code) != '<')
3419 return 0;
3420 inmode = GET_MODE (XEXP (op, 0));
3421
3422 if (inmode == CCFPmode || inmode == CCFPUmode)
3423 {
3424 enum rtx_code second_code, bypass_code;
3425 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3426 return (bypass_code == NIL && second_code == NIL);
3427 }
3428 switch (code)
3a3677ff
RH
3429 {
3430 case EQ: case NE:
3a3677ff 3431 return 1;
9076b9c1 3432 case LT: case GE:
7e08e190 3433 if (inmode == CCmode || inmode == CCGCmode
9076b9c1
JH
3434 || inmode == CCGOCmode || inmode == CCNOmode)
3435 return 1;
3436 return 0;
7e08e190 3437 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
7e08e190 3438 if (inmode == CCmode)
9076b9c1
JH
3439 return 1;
3440 return 0;
3441 case GT: case LE:
7e08e190 3442 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
9076b9c1
JH
3443 return 1;
3444 return 0;
3a3677ff
RH
3445 default:
3446 return 0;
3447 }
3448}
3449
9076b9c1 3450/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3a3677ff 3451
9076b9c1
JH
3452int
3453fcmov_comparison_operator (op, mode)
3a3677ff
RH
3454 register rtx op;
3455 enum machine_mode mode;
3456{
b62d22a2 3457 enum machine_mode inmode;
9a915772 3458 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
3459 if (mode != VOIDmode && GET_MODE (op) != mode)
3460 return 0;
9a915772
JH
3461 if (GET_RTX_CLASS (code) != '<')
3462 return 0;
3463 inmode = GET_MODE (XEXP (op, 0));
3464 if (inmode == CCFPmode || inmode == CCFPUmode)
3a3677ff 3465 {
9a915772
JH
3466 enum rtx_code second_code, bypass_code;
3467 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3468 if (bypass_code != NIL || second_code != NIL)
3469 return 0;
3470 code = ix86_fp_compare_code_to_integer (code);
3471 }
3472 /* i387 supports just limited amount of conditional codes. */
3473 switch (code)
3474 {
3475 case LTU: case GTU: case LEU: case GEU:
3476 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
9076b9c1
JH
3477 return 1;
3478 return 0;
9a915772
JH
3479 case ORDERED: case UNORDERED:
3480 case EQ: case NE:
3481 return 1;
3a3677ff
RH
3482 default:
3483 return 0;
3484 }
e075ae69 3485}
b840bfb0 3486
e9e80858
JH
3487/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3488
3489int
3490promotable_binary_operator (op, mode)
3491 register rtx op;
3492 enum machine_mode mode ATTRIBUTE_UNUSED;
3493{
3494 switch (GET_CODE (op))
3495 {
3496 case MULT:
3497 /* Modern CPUs have same latency for HImode and SImode multiply,
3498 but 386 and 486 do HImode multiply faster. */
3499 return ix86_cpu > PROCESSOR_I486;
3500 case PLUS:
3501 case AND:
3502 case IOR:
3503 case XOR:
3504 case ASHIFT:
3505 return 1;
3506 default:
3507 return 0;
3508 }
3509}
3510
e075ae69
RH
3511/* Nearly general operand, but accept any const_double, since we wish
3512 to be able to drop them into memory rather than have them get pulled
3513 into registers. */
b840bfb0 3514
2a2ab3f9 3515int
e075ae69
RH
3516cmp_fp_expander_operand (op, mode)
3517 register rtx op;
3518 enum machine_mode mode;
2a2ab3f9 3519{
e075ae69 3520 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 3521 return 0;
e075ae69 3522 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 3523 return 1;
e075ae69 3524 return general_operand (op, mode);
2a2ab3f9
JVA
3525}
3526
e075ae69 3527/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
3528
3529int
e075ae69 3530ext_register_operand (op, mode)
2a2ab3f9 3531 register rtx op;
bb5177ac 3532 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 3533{
3522082b 3534 int regno;
0d7d98ee
JH
3535 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3536 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
e075ae69 3537 return 0;
3522082b
JH
3538
3539 if (!register_operand (op, VOIDmode))
3540 return 0;
3541
3542 /* Be curefull to accept only registers having upper parts. */
3543 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3544 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
e075ae69
RH
3545}
3546
3547/* Return 1 if this is a valid binary floating-point operation.
0f290768 3548 OP is the expression matched, and MODE is its mode. */
e075ae69
RH
3549
3550int
3551binary_fp_operator (op, mode)
3552 register rtx op;
3553 enum machine_mode mode;
3554{
3555 if (mode != VOIDmode && mode != GET_MODE (op))
3556 return 0;
3557
2a2ab3f9
JVA
3558 switch (GET_CODE (op))
3559 {
e075ae69
RH
3560 case PLUS:
3561 case MINUS:
3562 case MULT:
3563 case DIV:
3564 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 3565
2a2ab3f9
JVA
3566 default:
3567 return 0;
3568 }
3569}
fee2770d 3570
e075ae69 3571int
b531087a 3572mult_operator (op, mode)
e075ae69
RH
3573 register rtx op;
3574 enum machine_mode mode ATTRIBUTE_UNUSED;
3575{
3576 return GET_CODE (op) == MULT;
3577}
3578
3579int
b531087a 3580div_operator (op, mode)
e075ae69
RH
3581 register rtx op;
3582 enum machine_mode mode ATTRIBUTE_UNUSED;
3583{
3584 return GET_CODE (op) == DIV;
3585}
0a726ef1
JL
3586
3587int
e075ae69
RH
3588arith_or_logical_operator (op, mode)
3589 rtx op;
3590 enum machine_mode mode;
0a726ef1 3591{
e075ae69
RH
3592 return ((mode == VOIDmode || GET_MODE (op) == mode)
3593 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3594 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
3595}
3596
e075ae69 3597/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
3598
3599int
e075ae69
RH
3600memory_displacement_operand (op, mode)
3601 register rtx op;
3602 enum machine_mode mode;
4f2c8ebb 3603{
e075ae69 3604 struct ix86_address parts;
e9a25f70 3605
e075ae69
RH
3606 if (! memory_operand (op, mode))
3607 return 0;
3608
3609 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3610 abort ();
3611
3612 return parts.disp != NULL_RTX;
4f2c8ebb
RS
3613}
3614
16189740 3615/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
3616 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3617
3618 ??? It seems likely that this will only work because cmpsi is an
3619 expander, and no actual insns use this. */
4f2c8ebb
RS
3620
3621int
e075ae69
RH
3622cmpsi_operand (op, mode)
3623 rtx op;
3624 enum machine_mode mode;
fee2770d 3625{
b9b2c339 3626 if (nonimmediate_operand (op, mode))
e075ae69
RH
3627 return 1;
3628
3629 if (GET_CODE (op) == AND
3630 && GET_MODE (op) == SImode
3631 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3632 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3633 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3634 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3635 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3636 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 3637 return 1;
e9a25f70 3638
fee2770d
RS
3639 return 0;
3640}
d784886d 3641
e075ae69
RH
3642/* Returns 1 if OP is memory operand that can not be represented by the
3643 modRM array. */
d784886d
RK
3644
3645int
e075ae69 3646long_memory_operand (op, mode)
d784886d
RK
3647 register rtx op;
3648 enum machine_mode mode;
3649{
e075ae69 3650 if (! memory_operand (op, mode))
d784886d
RK
3651 return 0;
3652
e075ae69 3653 return memory_address_length (op) != 0;
d784886d 3654}
2247f6ed
JH
3655
3656/* Return nonzero if the rtx is known aligned. */
3657
3658int
3659aligned_operand (op, mode)
3660 rtx op;
3661 enum machine_mode mode;
3662{
3663 struct ix86_address parts;
3664
3665 if (!general_operand (op, mode))
3666 return 0;
3667
0f290768 3668 /* Registers and immediate operands are always "aligned". */
2247f6ed
JH
3669 if (GET_CODE (op) != MEM)
3670 return 1;
3671
0f290768 3672 /* Don't even try to do any aligned optimizations with volatiles. */
2247f6ed
JH
3673 if (MEM_VOLATILE_P (op))
3674 return 0;
3675
3676 op = XEXP (op, 0);
3677
3678 /* Pushes and pops are only valid on the stack pointer. */
3679 if (GET_CODE (op) == PRE_DEC
3680 || GET_CODE (op) == POST_INC)
3681 return 1;
3682
3683 /* Decode the address. */
3684 if (! ix86_decompose_address (op, &parts))
3685 abort ();
3686
1540f9eb
JH
3687 if (parts.base && GET_CODE (parts.base) == SUBREG)
3688 parts.base = SUBREG_REG (parts.base);
3689 if (parts.index && GET_CODE (parts.index) == SUBREG)
3690 parts.index = SUBREG_REG (parts.index);
3691
2247f6ed
JH
3692 /* Look for some component that isn't known to be aligned. */
3693 if (parts.index)
3694 {
3695 if (parts.scale < 4
bdb429a5 3696 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
3697 return 0;
3698 }
3699 if (parts.base)
3700 {
bdb429a5 3701 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
3702 return 0;
3703 }
3704 if (parts.disp)
3705 {
3706 if (GET_CODE (parts.disp) != CONST_INT
3707 || (INTVAL (parts.disp) & 3) != 0)
3708 return 0;
3709 }
3710
3711 /* Didn't find one -- this must be an aligned address. */
3712 return 1;
3713}
e075ae69
RH
3714\f
3715/* Return true if the constant is something that can be loaded with
3716 a special instruction. Only handle 0.0 and 1.0; others are less
3717 worthwhile. */
57dbca5e
BS
3718
3719int
e075ae69
RH
3720standard_80387_constant_p (x)
3721 rtx x;
57dbca5e 3722{
2b04e52b 3723 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 3724 return -1;
2b04e52b
JH
3725 /* Note that on the 80387, other constants, such as pi, that we should support
3726 too. On some machines, these are much slower to load as standard constant,
3727 than to load from doubles in memory. */
3728 if (x == CONST0_RTX (GET_MODE (x)))
3729 return 1;
3730 if (x == CONST1_RTX (GET_MODE (x)))
3731 return 2;
e075ae69 3732 return 0;
57dbca5e
BS
3733}
3734
2b04e52b
JH
3735/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3736 */
3737int
3738standard_sse_constant_p (x)
3739 rtx x;
3740{
3741 if (GET_CODE (x) != CONST_DOUBLE)
3742 return -1;
3743 return (x == CONST0_RTX (GET_MODE (x)));
3744}
3745
2a2ab3f9
JVA
3746/* Returns 1 if OP contains a symbol reference */
3747
3748int
3749symbolic_reference_mentioned_p (op)
3750 rtx op;
3751{
6f7d635c 3752 register const char *fmt;
2a2ab3f9
JVA
3753 register int i;
3754
3755 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3756 return 1;
3757
3758 fmt = GET_RTX_FORMAT (GET_CODE (op));
3759 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3760 {
3761 if (fmt[i] == 'E')
3762 {
3763 register int j;
3764
3765 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3766 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3767 return 1;
3768 }
e9a25f70 3769
2a2ab3f9
JVA
3770 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3771 return 1;
3772 }
3773
3774 return 0;
3775}
e075ae69
RH
3776
3777/* Return 1 if it is appropriate to emit `ret' instructions in the
3778 body of a function. Do this only if the epilogue is simple, needing a
3779 couple of insns. Prior to reloading, we can't tell how many registers
3780 must be saved, so return 0 then. Return 0 if there is no frame
3781 marker to de-allocate.
3782
3783 If NON_SAVING_SETJMP is defined and true, then it is not possible
3784 for the epilogue to be simple, so return 0. This is a special case
3785 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3786 until final, but jump_optimize may need to know sooner if a
3787 `return' is OK. */
32b5b1aa
SC
3788
3789int
e075ae69 3790ix86_can_use_return_insn_p ()
32b5b1aa 3791{
4dd2ac2c 3792 struct ix86_frame frame;
9a7372d6 3793
e075ae69
RH
3794#ifdef NON_SAVING_SETJMP
3795 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3796 return 0;
3797#endif
9a7372d6
RH
3798
3799 if (! reload_completed || frame_pointer_needed)
3800 return 0;
32b5b1aa 3801
9a7372d6
RH
3802 /* Don't allow more than 32 pop, since that's all we can do
3803 with one instruction. */
3804 if (current_function_pops_args
3805 && current_function_args_size >= 32768)
e075ae69 3806 return 0;
32b5b1aa 3807
4dd2ac2c
JH
3808 ix86_compute_frame_layout (&frame);
3809 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 3810}
6189a572
JH
3811\f
3812/* Return 1 if VALUE can be stored in the sign extended immediate field. */
3813int
3814x86_64_sign_extended_value (value)
3815 rtx value;
3816{
3817 switch (GET_CODE (value))
3818 {
3819 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3820 to be at least 32 and this all acceptable constants are
3821 represented as CONST_INT. */
3822 case CONST_INT:
3823 if (HOST_BITS_PER_WIDE_INT == 32)
3824 return 1;
3825 else
3826 {
3827 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
fa9f36a1 3828 return trunc_int_for_mode (val, SImode) == val;
6189a572
JH
3829 }
3830 break;
3831
3832 /* For certain code models, the symbolic references are known to fit. */
3833 case SYMBOL_REF:
3834 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3835
3836 /* For certain code models, the code is near as well. */
3837 case LABEL_REF:
3838 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3839
3840 /* We also may accept the offsetted memory references in certain special
3841 cases. */
3842 case CONST:
3843 if (GET_CODE (XEXP (value, 0)) == UNSPEC
8ee41eaf 3844 && XINT (XEXP (value, 0), 1) == UNSPEC_GOTPCREL)
6189a572
JH
3845 return 1;
3846 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3847 {
3848 rtx op1 = XEXP (XEXP (value, 0), 0);
3849 rtx op2 = XEXP (XEXP (value, 0), 1);
3850 HOST_WIDE_INT offset;
3851
3852 if (ix86_cmodel == CM_LARGE)
3853 return 0;
3854 if (GET_CODE (op2) != CONST_INT)
3855 return 0;
3856 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3857 switch (GET_CODE (op1))
3858 {
3859 case SYMBOL_REF:
3860 /* For CM_SMALL assume that latest object is 1MB before
3861 end of 31bits boundary. We may also accept pretty
3862 large negative constants knowing that all objects are
3863 in the positive half of address space. */
3864 if (ix86_cmodel == CM_SMALL
3865 && offset < 1024*1024*1024
3866 && trunc_int_for_mode (offset, SImode) == offset)
3867 return 1;
3868 /* For CM_KERNEL we know that all object resist in the
3869 negative half of 32bits address space. We may not
3870 accept negative offsets, since they may be just off
d6a7951f 3871 and we may accept pretty large positive ones. */
6189a572
JH
3872 if (ix86_cmodel == CM_KERNEL
3873 && offset > 0
3874 && trunc_int_for_mode (offset, SImode) == offset)
3875 return 1;
3876 break;
3877 case LABEL_REF:
3878 /* These conditions are similar to SYMBOL_REF ones, just the
3879 constraints for code models differ. */
3880 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3881 && offset < 1024*1024*1024
3882 && trunc_int_for_mode (offset, SImode) == offset)
3883 return 1;
3884 if (ix86_cmodel == CM_KERNEL
3885 && offset > 0
3886 && trunc_int_for_mode (offset, SImode) == offset)
3887 return 1;
3888 break;
3889 default:
3890 return 0;
3891 }
3892 }
3893 return 0;
3894 default:
3895 return 0;
3896 }
3897}
3898
3899/* Return 1 if VALUE can be stored in the zero extended immediate field. */
3900int
3901x86_64_zero_extended_value (value)
3902 rtx value;
3903{
3904 switch (GET_CODE (value))
3905 {
3906 case CONST_DOUBLE:
3907 if (HOST_BITS_PER_WIDE_INT == 32)
3908 return (GET_MODE (value) == VOIDmode
3909 && !CONST_DOUBLE_HIGH (value));
3910 else
3911 return 0;
3912 case CONST_INT:
3913 if (HOST_BITS_PER_WIDE_INT == 32)
3914 return INTVAL (value) >= 0;
3915 else
b531087a 3916 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
6189a572
JH
3917 break;
3918
3919 /* For certain code models, the symbolic references are known to fit. */
3920 case SYMBOL_REF:
3921 return ix86_cmodel == CM_SMALL;
3922
3923 /* For certain code models, the code is near as well. */
3924 case LABEL_REF:
3925 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3926
3927 /* We also may accept the offsetted memory references in certain special
3928 cases. */
3929 case CONST:
3930 if (GET_CODE (XEXP (value, 0)) == PLUS)
3931 {
3932 rtx op1 = XEXP (XEXP (value, 0), 0);
3933 rtx op2 = XEXP (XEXP (value, 0), 1);
3934
3935 if (ix86_cmodel == CM_LARGE)
3936 return 0;
3937 switch (GET_CODE (op1))
3938 {
3939 case SYMBOL_REF:
3940 return 0;
d6a7951f 3941 /* For small code model we may accept pretty large positive
6189a572
JH
3942 offsets, since one bit is available for free. Negative
3943 offsets are limited by the size of NULL pointer area
3944 specified by the ABI. */
3945 if (ix86_cmodel == CM_SMALL
3946 && GET_CODE (op2) == CONST_INT
3947 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3948 && (trunc_int_for_mode (INTVAL (op2), SImode)
3949 == INTVAL (op2)))
3950 return 1;
3951 /* ??? For the kernel, we may accept adjustment of
3952 -0x10000000, since we know that it will just convert
d6a7951f 3953 negative address space to positive, but perhaps this
6189a572
JH
3954 is not worthwhile. */
3955 break;
3956 case LABEL_REF:
3957 /* These conditions are similar to SYMBOL_REF ones, just the
3958 constraints for code models differ. */
3959 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3960 && GET_CODE (op2) == CONST_INT
3961 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3962 && (trunc_int_for_mode (INTVAL (op2), SImode)
3963 == INTVAL (op2)))
3964 return 1;
3965 break;
3966 default:
3967 return 0;
3968 }
3969 }
3970 return 0;
3971 default:
3972 return 0;
3973 }
3974}
6fca22eb
RH
3975
3976/* Value should be nonzero if functions must have frame pointers.
3977 Zero means the frame pointer need not be set up (and parms may
3978 be accessed via the stack pointer) in functions that seem suitable. */
3979
3980int
3981ix86_frame_pointer_required ()
3982{
3983 /* If we accessed previous frames, then the generated code expects
3984 to be able to access the saved ebp value in our frame. */
3985 if (cfun->machine->accesses_prev_frame)
3986 return 1;
a4f31c00 3987
6fca22eb
RH
3988 /* Several x86 os'es need a frame pointer for other reasons,
3989 usually pertaining to setjmp. */
3990 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3991 return 1;
3992
3993 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3994 the frame pointer by default. Turn it back on now if we've not
3995 got a leaf function. */
a7943381
RH
3996 if (TARGET_OMIT_LEAF_FRAME_POINTER
3997 && (!current_function_is_leaf || current_function_profile))
6fca22eb
RH
3998 return 1;
3999
4000 return 0;
4001}
4002
4003/* Record that the current function accesses previous call frames. */
4004
4005void
4006ix86_setup_frame_addresses ()
4007{
4008 cfun->machine->accesses_prev_frame = 1;
4009}
e075ae69 4010\f
145aacc2
RH
4011#if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4012# define USE_HIDDEN_LINKONCE 1
4013#else
4014# define USE_HIDDEN_LINKONCE 0
4015#endif
4016
bd09bdeb 4017static int pic_labels_used;
e9a25f70 4018
145aacc2
RH
4019/* Fills in the label name that should be used for a pc thunk for
4020 the given register. */
4021
4022static void
4023get_pc_thunk_name (name, regno)
4024 char name[32];
4025 unsigned int regno;
4026{
4027 if (USE_HIDDEN_LINKONCE)
4028 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4029 else
4030 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4031}
4032
4033
e075ae69
RH
4034/* This function generates code for -fpic that loads %ebx with
4035 the return address of the caller and then returns. */
4036
4037void
4cf12e7e 4038ix86_asm_file_end (file)
e075ae69 4039 FILE *file;
e075ae69
RH
4040{
4041 rtx xops[2];
bd09bdeb 4042 int regno;
32b5b1aa 4043
bd09bdeb 4044 for (regno = 0; regno < 8; ++regno)
7c262518 4045 {
145aacc2
RH
4046 char name[32];
4047
bd09bdeb
RH
4048 if (! ((pic_labels_used >> regno) & 1))
4049 continue;
4050
145aacc2 4051 get_pc_thunk_name (name, regno);
bd09bdeb 4052
145aacc2
RH
4053 if (USE_HIDDEN_LINKONCE)
4054 {
4055 tree decl;
4056
4057 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4058 error_mark_node);
4059 TREE_PUBLIC (decl) = 1;
4060 TREE_STATIC (decl) = 1;
4061 DECL_ONE_ONLY (decl) = 1;
4062
4063 (*targetm.asm_out.unique_section) (decl, 0);
4064 named_section (decl, NULL, 0);
4065
5eb99654 4066 (*targetm.asm_out.globalize_label) (file, name);
145aacc2
RH
4067 fputs ("\t.hidden\t", file);
4068 assemble_name (file, name);
4069 fputc ('\n', file);
4070 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4071 }
4072 else
4073 {
4074 text_section ();
4075 ASM_OUTPUT_LABEL (file, name);
4076 }
bd09bdeb
RH
4077
4078 xops[0] = gen_rtx_REG (SImode, regno);
4079 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4080 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4081 output_asm_insn ("ret", xops);
7c262518 4082 }
32b5b1aa 4083}
32b5b1aa 4084
c8c03509 4085/* Emit code for the SET_GOT patterns. */
32b5b1aa 4086
c8c03509
RH
4087const char *
4088output_set_got (dest)
4089 rtx dest;
4090{
4091 rtx xops[3];
0d7d98ee 4092
c8c03509 4093 xops[0] = dest;
5fc0e5df 4094 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
32b5b1aa 4095
c8c03509 4096 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
32b5b1aa 4097 {
c8c03509
RH
4098 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4099
4100 if (!flag_pic)
4101 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4102 else
4103 output_asm_insn ("call\t%a2", xops);
4104
b069de3b
SS
4105#if TARGET_MACHO
4106 /* Output the "canonical" label name ("Lxx$pb") here too. This
4107 is what will be referred to by the Mach-O PIC subsystem. */
4108 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4109#endif
c8c03509
RH
4110 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4111 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4112
4113 if (flag_pic)
4114 output_asm_insn ("pop{l}\t%0", xops);
32b5b1aa 4115 }
e075ae69 4116 else
e5cb57e8 4117 {
145aacc2
RH
4118 char name[32];
4119 get_pc_thunk_name (name, REGNO (dest));
bd09bdeb 4120 pic_labels_used |= 1 << REGNO (dest);
f996902d 4121
145aacc2 4122 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
c8c03509
RH
4123 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4124 output_asm_insn ("call\t%X2", xops);
e5cb57e8 4125 }
e5cb57e8 4126
c8c03509
RH
4127 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4128 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
b069de3b 4129 else if (!TARGET_MACHO)
8e9fadc3 4130 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
79325812 4131
c8c03509 4132 return "";
e9a25f70 4133}
8dfe5673 4134
0d7d98ee 4135/* Generate an "push" pattern for input ARG. */
e9a25f70 4136
e075ae69
RH
4137static rtx
4138gen_push (arg)
4139 rtx arg;
e9a25f70 4140{
c5c76735 4141 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
4142 gen_rtx_MEM (Pmode,
4143 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
4144 stack_pointer_rtx)),
4145 arg);
e9a25f70
JL
4146}
4147
bd09bdeb
RH
4148/* Return >= 0 if there is an unused call-clobbered register available
4149 for the entire function. */
4150
4151static unsigned int
4152ix86_select_alt_pic_regnum ()
4153{
4154 if (current_function_is_leaf && !current_function_profile)
4155 {
4156 int i;
4157 for (i = 2; i >= 0; --i)
4158 if (!regs_ever_live[i])
4159 return i;
4160 }
4161
4162 return INVALID_REGNUM;
4163}
fce5a9f2 4164
4dd2ac2c
JH
4165/* Return 1 if we need to save REGNO. */
4166static int
1020a5ab 4167ix86_save_reg (regno, maybe_eh_return)
9b690711 4168 unsigned int regno;
37a58036 4169 int maybe_eh_return;
1020a5ab 4170{
bd09bdeb
RH
4171 if (pic_offset_table_rtx
4172 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4173 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
66edd3b4 4174 || current_function_profile
1020a5ab 4175 || current_function_calls_eh_return))
bd09bdeb
RH
4176 {
4177 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4178 return 0;
4179 return 1;
4180 }
1020a5ab
RH
4181
4182 if (current_function_calls_eh_return && maybe_eh_return)
4183 {
4184 unsigned i;
4185 for (i = 0; ; i++)
4186 {
b531087a 4187 unsigned test = EH_RETURN_DATA_REGNO (i);
1020a5ab
RH
4188 if (test == INVALID_REGNUM)
4189 break;
9b690711 4190 if (test == regno)
1020a5ab
RH
4191 return 1;
4192 }
4193 }
4dd2ac2c 4194
1020a5ab
RH
4195 return (regs_ever_live[regno]
4196 && !call_used_regs[regno]
4197 && !fixed_regs[regno]
4198 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
4199}
4200
0903fcab
JH
4201/* Return number of registers to be saved on the stack. */
4202
4203static int
4204ix86_nsaved_regs ()
4205{
4206 int nregs = 0;
0903fcab
JH
4207 int regno;
4208
4dd2ac2c 4209 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4210 if (ix86_save_reg (regno, true))
4dd2ac2c 4211 nregs++;
0903fcab
JH
4212 return nregs;
4213}
4214
4215/* Return the offset between two registers, one to be eliminated, and the other
4216 its replacement, at the start of a routine. */
4217
4218HOST_WIDE_INT
4219ix86_initial_elimination_offset (from, to)
4220 int from;
4221 int to;
4222{
4dd2ac2c
JH
4223 struct ix86_frame frame;
4224 ix86_compute_frame_layout (&frame);
564d80f4
JH
4225
4226 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4227 return frame.hard_frame_pointer_offset;
564d80f4
JH
4228 else if (from == FRAME_POINTER_REGNUM
4229 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4230 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4231 else
4232 {
564d80f4
JH
4233 if (to != STACK_POINTER_REGNUM)
4234 abort ();
4235 else if (from == ARG_POINTER_REGNUM)
4dd2ac2c 4236 return frame.stack_pointer_offset;
564d80f4
JH
4237 else if (from != FRAME_POINTER_REGNUM)
4238 abort ();
0903fcab 4239 else
4dd2ac2c 4240 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4241 }
4242}
4243
4dd2ac2c 4244/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 4245
4dd2ac2c
JH
4246static void
4247ix86_compute_frame_layout (frame)
4248 struct ix86_frame *frame;
65954bd8 4249{
65954bd8 4250 HOST_WIDE_INT total_size;
564d80f4 4251 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
44affdae
JH
4252 int offset;
4253 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4dd2ac2c 4254 HOST_WIDE_INT size = get_frame_size ();
65954bd8 4255
4dd2ac2c 4256 frame->nregs = ix86_nsaved_regs ();
564d80f4 4257 total_size = size;
65954bd8 4258
9ba81eaa 4259 /* Skip return address and saved base pointer. */
4dd2ac2c
JH
4260 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4261
4262 frame->hard_frame_pointer_offset = offset;
564d80f4 4263
fcbfaa65
RK
4264 /* Do some sanity checking of stack_alignment_needed and
4265 preferred_alignment, since i386 port is the only using those features
f710504c 4266 that may break easily. */
564d80f4 4267
44affdae
JH
4268 if (size && !stack_alignment_needed)
4269 abort ();
44affdae
JH
4270 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4271 abort ();
4272 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4273 abort ();
4274 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4275 abort ();
564d80f4 4276
4dd2ac2c
JH
4277 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4278 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 4279
4dd2ac2c
JH
4280 /* Register save area */
4281 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 4282
8362f420
JH
4283 /* Va-arg area */
4284 if (ix86_save_varrargs_registers)
4285 {
4286 offset += X86_64_VARARGS_SIZE;
4287 frame->va_arg_size = X86_64_VARARGS_SIZE;
4288 }
4289 else
4290 frame->va_arg_size = 0;
4291
4dd2ac2c
JH
4292 /* Align start of frame for local function. */
4293 frame->padding1 = ((offset + stack_alignment_needed - 1)
4294 & -stack_alignment_needed) - offset;
f73ad30e 4295
4dd2ac2c 4296 offset += frame->padding1;
65954bd8 4297
4dd2ac2c
JH
4298 /* Frame pointer points here. */
4299 frame->frame_pointer_offset = offset;
54ff41b7 4300
4dd2ac2c 4301 offset += size;
65954bd8 4302
0b7ae565
RH
4303 /* Add outgoing arguments area. Can be skipped if we eliminated
4304 all the function calls as dead code. */
4305 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4dd2ac2c
JH
4306 {
4307 offset += current_function_outgoing_args_size;
4308 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4309 }
4310 else
4311 frame->outgoing_arguments_size = 0;
564d80f4 4312
002ff5bc
RH
4313 /* Align stack boundary. Only needed if we're calling another function
4314 or using alloca. */
4315 if (!current_function_is_leaf || current_function_calls_alloca)
0b7ae565
RH
4316 frame->padding2 = ((offset + preferred_alignment - 1)
4317 & -preferred_alignment) - offset;
4318 else
4319 frame->padding2 = 0;
4dd2ac2c
JH
4320
4321 offset += frame->padding2;
4322
4323 /* We've reached end of stack frame. */
4324 frame->stack_pointer_offset = offset;
4325
4326 /* Size prologue needs to allocate. */
4327 frame->to_allocate =
4328 (size + frame->padding1 + frame->padding2
8362f420 4329 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 4330
8362f420
JH
4331 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4332 && current_function_is_leaf)
4333 {
4334 frame->red_zone_size = frame->to_allocate;
4335 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4336 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4337 }
4338 else
4339 frame->red_zone_size = 0;
4340 frame->to_allocate -= frame->red_zone_size;
4341 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c
JH
4342#if 0
4343 fprintf (stderr, "nregs: %i\n", frame->nregs);
4344 fprintf (stderr, "size: %i\n", size);
4345 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4346 fprintf (stderr, "padding1: %i\n", frame->padding1);
8362f420 4347 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4dd2ac2c
JH
4348 fprintf (stderr, "padding2: %i\n", frame->padding2);
4349 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
8362f420 4350 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4dd2ac2c
JH
4351 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4352 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4353 frame->hard_frame_pointer_offset);
4354 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4355#endif
65954bd8
JL
4356}
4357
0903fcab
JH
4358/* Emit code to save registers in the prologue. */
4359
4360static void
4361ix86_emit_save_regs ()
4362{
4363 register int regno;
0903fcab 4364 rtx insn;
0903fcab 4365
4dd2ac2c 4366 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4367 if (ix86_save_reg (regno, true))
0903fcab 4368 {
0d7d98ee 4369 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
4370 RTX_FRAME_RELATED_P (insn) = 1;
4371 }
4372}
4373
c6036a37
JH
4374/* Emit code to save registers using MOV insns. First register
4375 is restored from POINTER + OFFSET. */
4376static void
4377ix86_emit_save_regs_using_mov (pointer, offset)
b72f00af
RK
4378 rtx pointer;
4379 HOST_WIDE_INT offset;
c6036a37
JH
4380{
4381 int regno;
4382 rtx insn;
4383
4384 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4385 if (ix86_save_reg (regno, true))
4386 {
b72f00af
RK
4387 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4388 Pmode, offset),
c6036a37
JH
4389 gen_rtx_REG (Pmode, regno));
4390 RTX_FRAME_RELATED_P (insn) = 1;
4391 offset += UNITS_PER_WORD;
4392 }
4393}
4394
0f290768 4395/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
4396
4397void
4398ix86_expand_prologue ()
2a2ab3f9 4399{
564d80f4 4400 rtx insn;
bd09bdeb 4401 bool pic_reg_used;
4dd2ac2c 4402 struct ix86_frame frame;
6ab16dd9 4403 int use_mov = 0;
c6036a37 4404 HOST_WIDE_INT allocate;
4dd2ac2c 4405
2ab0437e 4406 if (!optimize_size)
6ab16dd9
JH
4407 {
4408 use_fast_prologue_epilogue
4409 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
2ab0437e
JH
4410 if (TARGET_PROLOGUE_USING_MOVE)
4411 use_mov = use_fast_prologue_epilogue;
6ab16dd9 4412 }
4dd2ac2c 4413 ix86_compute_frame_layout (&frame);
79325812 4414
e075ae69
RH
4415 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4416 slower on all targets. Also sdb doesn't like it. */
e9a25f70 4417
2a2ab3f9
JVA
4418 if (frame_pointer_needed)
4419 {
564d80f4 4420 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 4421 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 4422
564d80f4 4423 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 4424 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
4425 }
4426
c6036a37
JH
4427 allocate = frame.to_allocate;
4428 /* In case we are dealing only with single register and empty frame,
4429 push is equivalent of the mov+add sequence. */
4430 if (allocate == 0 && frame.nregs <= 1)
4431 use_mov = 0;
4432
4433 if (!use_mov)
4434 ix86_emit_save_regs ();
4435 else
4436 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 4437
c6036a37 4438 if (allocate == 0)
8dfe5673 4439 ;
e323735c 4440 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
469ac993 4441 {
f2042df3
RH
4442 insn = emit_insn (gen_pro_epilogue_adjust_stack
4443 (stack_pointer_rtx, stack_pointer_rtx,
e323735c 4444 GEN_INT (-allocate)));
e075ae69 4445 RTX_FRAME_RELATED_P (insn) = 1;
469ac993 4446 }
79325812 4447 else
8dfe5673 4448 {
e075ae69 4449 /* ??? Is this only valid for Win32? */
e9a25f70 4450
e075ae69 4451 rtx arg0, sym;
e9a25f70 4452
8362f420 4453 if (TARGET_64BIT)
b531087a 4454 abort ();
8362f420 4455
e075ae69 4456 arg0 = gen_rtx_REG (SImode, 0);
c6036a37 4457 emit_move_insn (arg0, GEN_INT (allocate));
77a989d1 4458
e075ae69
RH
4459 sym = gen_rtx_MEM (FUNCTION_MODE,
4460 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
32ee7d1d 4461 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
e075ae69
RH
4462
4463 CALL_INSN_FUNCTION_USAGE (insn)
276ab4a4
RH
4464 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4465 CALL_INSN_FUNCTION_USAGE (insn));
e075ae69 4466 }
c6036a37
JH
4467 if (use_mov)
4468 {
4469 if (!frame_pointer_needed || !frame.to_allocate)
4470 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4471 else
4472 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4473 -frame.nregs * UNITS_PER_WORD);
4474 }
e9a25f70 4475
84530511
SC
4476#ifdef SUBTARGET_PROLOGUE
4477 SUBTARGET_PROLOGUE;
0f290768 4478#endif
84530511 4479
bd09bdeb
RH
4480 pic_reg_used = false;
4481 if (pic_offset_table_rtx
4482 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4483 || current_function_profile))
4484 {
4485 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4486
4487 if (alt_pic_reg_used != INVALID_REGNUM)
4488 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4489
4490 pic_reg_used = true;
4491 }
4492
e9a25f70 4493 if (pic_reg_used)
c8c03509
RH
4494 {
4495 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4496
66edd3b4
RH
4497 /* Even with accurate pre-reload life analysis, we can wind up
4498 deleting all references to the pic register after reload.
4499 Consider if cross-jumping unifies two sides of a branch
4500 controled by a comparison vs the only read from a global.
4501 In which case, allow the set_got to be deleted, though we're
4502 too late to do anything about the ebx save in the prologue. */
c8c03509
RH
4503 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4504 }
77a989d1 4505
66edd3b4
RH
4506 /* Prevent function calls from be scheduled before the call to mcount.
4507 In the pic_reg_used case, make sure that the got load isn't deleted. */
4508 if (current_function_profile)
4509 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
77a989d1
SC
4510}
4511
da2d1d3a
JH
4512/* Emit code to restore saved registers using MOV insns. First register
4513 is restored from POINTER + OFFSET. */
4514static void
1020a5ab
RH
4515ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4516 rtx pointer;
4517 int offset;
37a58036 4518 int maybe_eh_return;
da2d1d3a
JH
4519{
4520 int regno;
da2d1d3a 4521
4dd2ac2c 4522 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4523 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 4524 {
4dd2ac2c 4525 emit_move_insn (gen_rtx_REG (Pmode, regno),
b72f00af
RK
4526 adjust_address (gen_rtx_MEM (Pmode, pointer),
4527 Pmode, offset));
4dd2ac2c 4528 offset += UNITS_PER_WORD;
da2d1d3a
JH
4529 }
4530}
4531
0f290768 4532/* Restore function stack, frame, and registers. */
e9a25f70 4533
2a2ab3f9 4534void
1020a5ab
RH
4535ix86_expand_epilogue (style)
4536 int style;
2a2ab3f9 4537{
1c71e60e 4538 int regno;
fdb8a883 4539 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 4540 struct ix86_frame frame;
65954bd8 4541 HOST_WIDE_INT offset;
4dd2ac2c
JH
4542
4543 ix86_compute_frame_layout (&frame);
2a2ab3f9 4544
a4f31c00 4545 /* Calculate start of saved registers relative to ebp. Special care
84e306b4
RH
4546 must be taken for the normal return case of a function using
4547 eh_return: the eax and edx registers are marked as saved, but not
4548 restored along this path. */
4549 offset = frame.nregs;
4550 if (current_function_calls_eh_return && style != 2)
4551 offset -= 2;
4552 offset *= -UNITS_PER_WORD;
2a2ab3f9 4553
fdb8a883
JW
4554 /* If we're only restoring one register and sp is not valid then
4555 using a move instruction to restore the register since it's
0f290768 4556 less work than reloading sp and popping the register.
da2d1d3a
JH
4557
4558 The default code result in stack adjustment using add/lea instruction,
4559 while this code results in LEAVE instruction (or discrete equivalent),
4560 so it is profitable in some other cases as well. Especially when there
4561 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4562 and there is exactly one register to pop. This heruistic may need some
4563 tuning in future. */
4dd2ac2c 4564 if ((!sp_valid && frame.nregs <= 1)
2ab0437e 4565 || (TARGET_EPILOGUE_USING_MOVE
6ab16dd9 4566 && use_fast_prologue_epilogue
c6036a37 4567 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 4568 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2ab0437e 4569 || (frame_pointer_needed && TARGET_USE_LEAVE
6ab16dd9 4570 && use_fast_prologue_epilogue && frame.nregs == 1)
2ab0437e 4571 || current_function_calls_eh_return)
2a2ab3f9 4572 {
da2d1d3a
JH
4573 /* Restore registers. We can use ebp or esp to address the memory
4574 locations. If both are available, default to ebp, since offsets
4575 are known to be small. Only exception is esp pointing directly to the
4576 end of block of saved registers, where we may simplify addressing
4577 mode. */
4578
4dd2ac2c 4579 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
4580 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4581 frame.to_allocate, style == 2);
da2d1d3a 4582 else
1020a5ab
RH
4583 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4584 offset, style == 2);
4585
4586 /* eh_return epilogues need %ecx added to the stack pointer. */
4587 if (style == 2)
4588 {
4589 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 4590
1020a5ab
RH
4591 if (frame_pointer_needed)
4592 {
4593 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4594 tmp = plus_constant (tmp, UNITS_PER_WORD);
4595 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4596
4597 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4598 emit_move_insn (hard_frame_pointer_rtx, tmp);
4599
4600 emit_insn (gen_pro_epilogue_adjust_stack
f2042df3 4601 (stack_pointer_rtx, sa, const0_rtx));
1020a5ab
RH
4602 }
4603 else
4604 {
4605 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4606 tmp = plus_constant (tmp, (frame.to_allocate
4607 + frame.nregs * UNITS_PER_WORD));
4608 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4609 }
4610 }
4611 else if (!frame_pointer_needed)
f2042df3
RH
4612 emit_insn (gen_pro_epilogue_adjust_stack
4613 (stack_pointer_rtx, stack_pointer_rtx,
4614 GEN_INT (frame.to_allocate
4615 + frame.nregs * UNITS_PER_WORD)));
0f290768 4616 /* If not an i386, mov & pop is faster than "leave". */
6ab16dd9 4617 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
8362f420 4618 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
c8c5cb99 4619 else
2a2ab3f9 4620 {
1c71e60e
JH
4621 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4622 hard_frame_pointer_rtx,
f2042df3 4623 const0_rtx));
8362f420
JH
4624 if (TARGET_64BIT)
4625 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4626 else
4627 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
4628 }
4629 }
1c71e60e 4630 else
68f654ec 4631 {
1c71e60e
JH
4632 /* First step is to deallocate the stack frame so that we can
4633 pop the registers. */
4634 if (!sp_valid)
4635 {
4636 if (!frame_pointer_needed)
4637 abort ();
4638 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4639 hard_frame_pointer_rtx,
f2042df3 4640 GEN_INT (offset)));
1c71e60e 4641 }
4dd2ac2c 4642 else if (frame.to_allocate)
f2042df3
RH
4643 emit_insn (gen_pro_epilogue_adjust_stack
4644 (stack_pointer_rtx, stack_pointer_rtx,
4645 GEN_INT (frame.to_allocate)));
1c71e60e 4646
4dd2ac2c 4647 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4648 if (ix86_save_reg (regno, false))
8362f420
JH
4649 {
4650 if (TARGET_64BIT)
4651 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4652 else
4653 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4654 }
4dd2ac2c 4655 if (frame_pointer_needed)
8362f420 4656 {
f5143c46 4657 /* Leave results in shorter dependency chains on CPUs that are
2ab0437e
JH
4658 able to grok it fast. */
4659 if (TARGET_USE_LEAVE)
4660 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4661 else if (TARGET_64BIT)
8362f420
JH
4662 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4663 else
4664 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4665 }
68f654ec 4666 }
68f654ec 4667
cbbf65e0 4668 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 4669 if (style == 0)
cbbf65e0
RH
4670 return;
4671
2a2ab3f9
JVA
4672 if (current_function_pops_args && current_function_args_size)
4673 {
e075ae69 4674 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 4675
b8c752c8
UD
4676 /* i386 can only pop 64K bytes. If asked to pop more, pop
4677 return address, do explicit add, and jump indirectly to the
0f290768 4678 caller. */
2a2ab3f9 4679
b8c752c8 4680 if (current_function_pops_args >= 65536)
2a2ab3f9 4681 {
e075ae69 4682 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 4683
8362f420
JH
4684 /* There are is no "pascal" calling convention in 64bit ABI. */
4685 if (TARGET_64BIT)
b531087a 4686 abort ();
8362f420 4687
e075ae69
RH
4688 emit_insn (gen_popsi1 (ecx));
4689 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 4690 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 4691 }
79325812 4692 else
e075ae69
RH
4693 emit_jump_insn (gen_return_pop_internal (popc));
4694 }
4695 else
4696 emit_jump_insn (gen_return_internal ());
4697}
bd09bdeb
RH
4698
4699/* Reset from the function's potential modifications. */
4700
4701static void
4702ix86_output_function_epilogue (file, size)
4703 FILE *file ATTRIBUTE_UNUSED;
4704 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
4705{
4706 if (pic_offset_table_rtx)
4707 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4708}
e075ae69
RH
4709\f
4710/* Extract the parts of an RTL expression that is a valid memory address
b446e5a2
JH
4711 for an instruction. Return 0 if the structure of the address is
4712 grossly off. Return -1 if the address contains ASHIFT, so it is not
4713 strictly valid, but still used for computing length of lea instruction.
4714 */
e075ae69
RH
4715
4716static int
4717ix86_decompose_address (addr, out)
4718 register rtx addr;
4719 struct ix86_address *out;
4720{
4721 rtx base = NULL_RTX;
4722 rtx index = NULL_RTX;
4723 rtx disp = NULL_RTX;
4724 HOST_WIDE_INT scale = 1;
4725 rtx scale_rtx = NULL_RTX;
b446e5a2 4726 int retval = 1;
e075ae69 4727
1540f9eb 4728 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
e075ae69
RH
4729 base = addr;
4730 else if (GET_CODE (addr) == PLUS)
4731 {
4732 rtx op0 = XEXP (addr, 0);
4733 rtx op1 = XEXP (addr, 1);
4734 enum rtx_code code0 = GET_CODE (op0);
4735 enum rtx_code code1 = GET_CODE (op1);
4736
4737 if (code0 == REG || code0 == SUBREG)
4738 {
4739 if (code1 == REG || code1 == SUBREG)
4740 index = op0, base = op1; /* index + base */
4741 else
4742 base = op0, disp = op1; /* base + displacement */
4743 }
4744 else if (code0 == MULT)
e9a25f70 4745 {
e075ae69
RH
4746 index = XEXP (op0, 0);
4747 scale_rtx = XEXP (op0, 1);
4748 if (code1 == REG || code1 == SUBREG)
4749 base = op1; /* index*scale + base */
e9a25f70 4750 else
e075ae69
RH
4751 disp = op1; /* index*scale + disp */
4752 }
4753 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4754 {
4755 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4756 scale_rtx = XEXP (XEXP (op0, 0), 1);
4757 base = XEXP (op0, 1);
4758 disp = op1;
2a2ab3f9 4759 }
e075ae69
RH
4760 else if (code0 == PLUS)
4761 {
4762 index = XEXP (op0, 0); /* index + base + disp */
4763 base = XEXP (op0, 1);
4764 disp = op1;
4765 }
4766 else
b446e5a2 4767 return 0;
e075ae69
RH
4768 }
4769 else if (GET_CODE (addr) == MULT)
4770 {
4771 index = XEXP (addr, 0); /* index*scale */
4772 scale_rtx = XEXP (addr, 1);
4773 }
4774 else if (GET_CODE (addr) == ASHIFT)
4775 {
4776 rtx tmp;
4777
4778 /* We're called for lea too, which implements ashift on occasion. */
4779 index = XEXP (addr, 0);
4780 tmp = XEXP (addr, 1);
4781 if (GET_CODE (tmp) != CONST_INT)
b446e5a2 4782 return 0;
e075ae69
RH
4783 scale = INTVAL (tmp);
4784 if ((unsigned HOST_WIDE_INT) scale > 3)
b446e5a2 4785 return 0;
e075ae69 4786 scale = 1 << scale;
b446e5a2 4787 retval = -1;
2a2ab3f9 4788 }
2a2ab3f9 4789 else
e075ae69
RH
4790 disp = addr; /* displacement */
4791
4792 /* Extract the integral value of scale. */
4793 if (scale_rtx)
e9a25f70 4794 {
e075ae69 4795 if (GET_CODE (scale_rtx) != CONST_INT)
b446e5a2 4796 return 0;
e075ae69 4797 scale = INTVAL (scale_rtx);
e9a25f70 4798 }
3b3c6a3f 4799
e075ae69
RH
4800 /* Allow arg pointer and stack pointer as index if there is not scaling */
4801 if (base && index && scale == 1
564d80f4
JH
4802 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4803 || index == stack_pointer_rtx))
e075ae69
RH
4804 {
4805 rtx tmp = base;
4806 base = index;
4807 index = tmp;
4808 }
4809
4810 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
4811 if ((base == hard_frame_pointer_rtx
4812 || base == frame_pointer_rtx
4813 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
4814 disp = const0_rtx;
4815
4816 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4817 Avoid this by transforming to [%esi+0]. */
4818 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4819 && base && !index && !disp
329e1d01 4820 && REG_P (base)
e075ae69
RH
4821 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4822 disp = const0_rtx;
4823
4824 /* Special case: encode reg+reg instead of reg*2. */
4825 if (!base && index && scale && scale == 2)
4826 base = index, scale = 1;
0f290768 4827
e075ae69
RH
4828 /* Special case: scaling cannot be encoded without base or displacement. */
4829 if (!base && !disp && index && scale != 1)
4830 disp = const0_rtx;
4831
4832 out->base = base;
4833 out->index = index;
4834 out->disp = disp;
4835 out->scale = scale;
3b3c6a3f 4836
b446e5a2 4837 return retval;
e075ae69 4838}
01329426
JH
4839\f
4840/* Return cost of the memory address x.
4841 For i386, it is better to use a complex address than let gcc copy
4842 the address into a reg and make a new pseudo. But not if the address
4843 requires to two regs - that would mean more pseudos with longer
4844 lifetimes. */
4845int
4846ix86_address_cost (x)
4847 rtx x;
4848{
4849 struct ix86_address parts;
4850 int cost = 1;
3b3c6a3f 4851
01329426
JH
4852 if (!ix86_decompose_address (x, &parts))
4853 abort ();
4854
1540f9eb
JH
4855 if (parts.base && GET_CODE (parts.base) == SUBREG)
4856 parts.base = SUBREG_REG (parts.base);
4857 if (parts.index && GET_CODE (parts.index) == SUBREG)
4858 parts.index = SUBREG_REG (parts.index);
4859
01329426
JH
4860 /* More complex memory references are better. */
4861 if (parts.disp && parts.disp != const0_rtx)
4862 cost--;
4863
4864 /* Attempt to minimize number of registers in the address. */
4865 if ((parts.base
4866 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4867 || (parts.index
4868 && (!REG_P (parts.index)
4869 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4870 cost++;
4871
4872 if (parts.base
4873 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4874 && parts.index
4875 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4876 && parts.base != parts.index)
4877 cost++;
4878
4879 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4880 since it's predecode logic can't detect the length of instructions
4881 and it degenerates to vector decoded. Increase cost of such
4882 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 4883 to split such addresses or even refuse such addresses at all.
01329426
JH
4884
4885 Following addressing modes are affected:
4886 [base+scale*index]
4887 [scale*index+disp]
4888 [base+index]
0f290768 4889
01329426
JH
4890 The first and last case may be avoidable by explicitly coding the zero in
4891 memory address, but I don't have AMD-K6 machine handy to check this
4892 theory. */
4893
4894 if (TARGET_K6
4895 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4896 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4897 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4898 cost += 10;
0f290768 4899
01329426
JH
4900 return cost;
4901}
4902\f
b949ea8b
JW
4903/* If X is a machine specific address (i.e. a symbol or label being
4904 referenced as a displacement from the GOT implemented using an
4905 UNSPEC), then return the base term. Otherwise return X. */
4906
4907rtx
4908ix86_find_base_term (x)
4909 rtx x;
4910{
4911 rtx term;
4912
6eb791fc
JH
4913 if (TARGET_64BIT)
4914 {
4915 if (GET_CODE (x) != CONST)
4916 return x;
4917 term = XEXP (x, 0);
4918 if (GET_CODE (term) == PLUS
4919 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4920 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4921 term = XEXP (term, 0);
4922 if (GET_CODE (term) != UNSPEC
8ee41eaf 4923 || XINT (term, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
4924 return x;
4925
4926 term = XVECEXP (term, 0, 0);
4927
4928 if (GET_CODE (term) != SYMBOL_REF
4929 && GET_CODE (term) != LABEL_REF)
4930 return x;
4931
4932 return term;
4933 }
4934
b949ea8b
JW
4935 if (GET_CODE (x) != PLUS
4936 || XEXP (x, 0) != pic_offset_table_rtx
4937 || GET_CODE (XEXP (x, 1)) != CONST)
4938 return x;
4939
4940 term = XEXP (XEXP (x, 1), 0);
4941
4942 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4943 term = XEXP (term, 0);
4944
4945 if (GET_CODE (term) != UNSPEC
8ee41eaf 4946 || XINT (term, 1) != UNSPEC_GOTOFF)
b949ea8b
JW
4947 return x;
4948
4949 term = XVECEXP (term, 0, 0);
4950
4951 if (GET_CODE (term) != SYMBOL_REF
4952 && GET_CODE (term) != LABEL_REF)
4953 return x;
4954
4955 return term;
4956}
4957\f
f996902d
RH
4958/* Determine if a given RTX is a valid constant. We already know this
4959 satisfies CONSTANT_P. */
4960
4961bool
4962legitimate_constant_p (x)
4963 rtx x;
4964{
4965 rtx inner;
4966
4967 switch (GET_CODE (x))
4968 {
4969 case SYMBOL_REF:
4970 /* TLS symbols are not constant. */
4971 if (tls_symbolic_operand (x, Pmode))
4972 return false;
4973 break;
4974
4975 case CONST:
4976 inner = XEXP (x, 0);
4977
4978 /* Offsets of TLS symbols are never valid.
4979 Discourage CSE from creating them. */
4980 if (GET_CODE (inner) == PLUS
4981 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
4982 return false;
4983
4984 /* Only some unspecs are valid as "constants". */
4985 if (GET_CODE (inner) == UNSPEC)
4986 switch (XINT (inner, 1))
4987 {
4988 case UNSPEC_TPOFF:
4989 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
4990 default:
4991 return false;
4992 }
4993 break;
4994
4995 default:
4996 break;
4997 }
4998
4999 /* Otherwise we handle everything else in the move patterns. */
5000 return true;
5001}
5002
5003/* Determine if a given RTX is a valid constant address. */
5004
5005bool
5006constant_address_p (x)
5007 rtx x;
5008{
5009 switch (GET_CODE (x))
5010 {
5011 case LABEL_REF:
5012 case CONST_INT:
5013 return true;
5014
5015 case CONST_DOUBLE:
5016 return TARGET_64BIT;
5017
5018 case CONST:
b069de3b
SS
5019 /* For Mach-O, really believe the CONST. */
5020 if (TARGET_MACHO)
5021 return true;
5022 /* Otherwise fall through. */
f996902d
RH
5023 case SYMBOL_REF:
5024 return !flag_pic && legitimate_constant_p (x);
5025
5026 default:
5027 return false;
5028 }
5029}
5030
5031/* Nonzero if the constant value X is a legitimate general operand
fce5a9f2 5032 when generating PIC code. It is given that flag_pic is on and
f996902d
RH
5033 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5034
5035bool
5036legitimate_pic_operand_p (x)
5037 rtx x;
5038{
5039 rtx inner;
5040
5041 switch (GET_CODE (x))
5042 {
5043 case CONST:
5044 inner = XEXP (x, 0);
5045
5046 /* Only some unspecs are valid as "constants". */
5047 if (GET_CODE (inner) == UNSPEC)
5048 switch (XINT (inner, 1))
5049 {
5050 case UNSPEC_TPOFF:
5051 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
5052 default:
5053 return false;
5054 }
5055 /* FALLTHRU */
5056
5057 case SYMBOL_REF:
5058 case LABEL_REF:
5059 return legitimate_pic_address_disp_p (x);
5060
5061 default:
5062 return true;
5063 }
5064}
5065
e075ae69
RH
5066/* Determine if a given CONST RTX is a valid memory displacement
5067 in PIC mode. */
0f290768 5068
59be65f6 5069int
91bb873f
RH
5070legitimate_pic_address_disp_p (disp)
5071 register rtx disp;
5072{
f996902d
RH
5073 bool saw_plus;
5074
6eb791fc
JH
5075 /* In 64bit mode we can allow direct addresses of symbols and labels
5076 when they are not dynamic symbols. */
5077 if (TARGET_64BIT)
5078 {
5079 rtx x = disp;
5080 if (GET_CODE (disp) == CONST)
5081 x = XEXP (disp, 0);
5082 /* ??? Handle PIC code models */
5083 if (GET_CODE (x) == PLUS
5084 && (GET_CODE (XEXP (x, 1)) == CONST_INT
5085 && ix86_cmodel == CM_SMALL_PIC
5086 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
5087 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
5088 x = XEXP (x, 0);
5089 if (local_symbolic_operand (x, Pmode))
5090 return 1;
5091 }
91bb873f
RH
5092 if (GET_CODE (disp) != CONST)
5093 return 0;
5094 disp = XEXP (disp, 0);
5095
6eb791fc
JH
5096 if (TARGET_64BIT)
5097 {
5098 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5099 of GOT tables. We should not need these anyway. */
5100 if (GET_CODE (disp) != UNSPEC
8ee41eaf 5101 || XINT (disp, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
5102 return 0;
5103
5104 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5105 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5106 return 0;
5107 return 1;
5108 }
5109
f996902d 5110 saw_plus = false;
91bb873f
RH
5111 if (GET_CODE (disp) == PLUS)
5112 {
5113 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5114 return 0;
5115 disp = XEXP (disp, 0);
f996902d 5116 saw_plus = true;
91bb873f
RH
5117 }
5118
b069de3b
SS
5119 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5120 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5121 {
5122 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5123 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5124 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5125 {
5126 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5127 if (strstr (sym_name, "$pb") != 0)
5128 return 1;
5129 }
5130 }
5131
8ee41eaf 5132 if (GET_CODE (disp) != UNSPEC)
91bb873f
RH
5133 return 0;
5134
623fe810
RH
5135 switch (XINT (disp, 1))
5136 {
8ee41eaf 5137 case UNSPEC_GOT:
f996902d
RH
5138 if (saw_plus)
5139 return false;
623fe810 5140 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
8ee41eaf 5141 case UNSPEC_GOTOFF:
623fe810 5142 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
f996902d 5143 case UNSPEC_GOTTPOFF:
dea73790
JJ
5144 case UNSPEC_GOTNTPOFF:
5145 case UNSPEC_INDNTPOFF:
f996902d
RH
5146 if (saw_plus)
5147 return false;
5148 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5149 case UNSPEC_NTPOFF:
f996902d
RH
5150 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5151 case UNSPEC_DTPOFF:
f996902d 5152 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
623fe810 5153 }
fce5a9f2 5154
623fe810 5155 return 0;
91bb873f
RH
5156}
5157
e075ae69
RH
5158/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5159 memory address for an instruction. The MODE argument is the machine mode
5160 for the MEM expression that wants to use this address.
5161
5162 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5163 convert common non-canonical forms to canonical form so that they will
5164 be recognized. */
5165
3b3c6a3f
MM
5166int
5167legitimate_address_p (mode, addr, strict)
5168 enum machine_mode mode;
5169 register rtx addr;
5170 int strict;
5171{
e075ae69
RH
5172 struct ix86_address parts;
5173 rtx base, index, disp;
5174 HOST_WIDE_INT scale;
5175 const char *reason = NULL;
5176 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
5177
5178 if (TARGET_DEBUG_ADDR)
5179 {
5180 fprintf (stderr,
e9a25f70 5181 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 5182 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
5183 debug_rtx (addr);
5184 }
5185
9e20be0c
JJ
5186 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5187 {
5188 if (TARGET_DEBUG_ADDR)
5189 fprintf (stderr, "Success.\n");
5190 return TRUE;
5191 }
5192
b446e5a2 5193 if (ix86_decompose_address (addr, &parts) <= 0)
3b3c6a3f 5194 {
e075ae69 5195 reason = "decomposition failed";
50e60bc3 5196 goto report_error;
3b3c6a3f
MM
5197 }
5198
e075ae69
RH
5199 base = parts.base;
5200 index = parts.index;
5201 disp = parts.disp;
5202 scale = parts.scale;
91f0226f 5203
e075ae69 5204 /* Validate base register.
e9a25f70
JL
5205
5206 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
5207 is one word out of a two word structure, which is represented internally
5208 as a DImode int. */
e9a25f70 5209
3b3c6a3f
MM
5210 if (base)
5211 {
1540f9eb 5212 rtx reg;
e075ae69
RH
5213 reason_rtx = base;
5214
1540f9eb
JH
5215 if (GET_CODE (base) == SUBREG)
5216 reg = SUBREG_REG (base);
5217 else
5218 reg = base;
5219
5220 if (GET_CODE (reg) != REG)
3b3c6a3f 5221 {
e075ae69 5222 reason = "base is not a register";
50e60bc3 5223 goto report_error;
3b3c6a3f
MM
5224 }
5225
c954bd01
RH
5226 if (GET_MODE (base) != Pmode)
5227 {
e075ae69 5228 reason = "base is not in Pmode";
50e60bc3 5229 goto report_error;
c954bd01
RH
5230 }
5231
1540f9eb
JH
5232 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5233 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
3b3c6a3f 5234 {
e075ae69 5235 reason = "base is not valid";
50e60bc3 5236 goto report_error;
3b3c6a3f
MM
5237 }
5238 }
5239
e075ae69 5240 /* Validate index register.
e9a25f70
JL
5241
5242 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
5243 is one word out of a two word structure, which is represented internally
5244 as a DImode int. */
e075ae69
RH
5245
5246 if (index)
3b3c6a3f 5247 {
1540f9eb 5248 rtx reg;
e075ae69
RH
5249 reason_rtx = index;
5250
1540f9eb
JH
5251 if (GET_CODE (index) == SUBREG)
5252 reg = SUBREG_REG (index);
5253 else
5254 reg = index;
5255
5256 if (GET_CODE (reg) != REG)
3b3c6a3f 5257 {
e075ae69 5258 reason = "index is not a register";
50e60bc3 5259 goto report_error;
3b3c6a3f
MM
5260 }
5261
e075ae69 5262 if (GET_MODE (index) != Pmode)
c954bd01 5263 {
e075ae69 5264 reason = "index is not in Pmode";
50e60bc3 5265 goto report_error;
c954bd01
RH
5266 }
5267
1540f9eb
JH
5268 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5269 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
3b3c6a3f 5270 {
e075ae69 5271 reason = "index is not valid";
50e60bc3 5272 goto report_error;
3b3c6a3f
MM
5273 }
5274 }
3b3c6a3f 5275
e075ae69
RH
5276 /* Validate scale factor. */
5277 if (scale != 1)
3b3c6a3f 5278 {
e075ae69
RH
5279 reason_rtx = GEN_INT (scale);
5280 if (!index)
3b3c6a3f 5281 {
e075ae69 5282 reason = "scale without index";
50e60bc3 5283 goto report_error;
3b3c6a3f
MM
5284 }
5285
e075ae69 5286 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 5287 {
e075ae69 5288 reason = "scale is not a valid multiplier";
50e60bc3 5289 goto report_error;
3b3c6a3f
MM
5290 }
5291 }
5292
91bb873f 5293 /* Validate displacement. */
3b3c6a3f
MM
5294 if (disp)
5295 {
e075ae69
RH
5296 reason_rtx = disp;
5297
0d7d98ee 5298 if (TARGET_64BIT)
3b3c6a3f 5299 {
0d7d98ee
JH
5300 if (!x86_64_sign_extended_value (disp))
5301 {
5302 reason = "displacement is out of range";
5303 goto report_error;
5304 }
5305 }
5306 else
5307 {
5308 if (GET_CODE (disp) == CONST_DOUBLE)
5309 {
5310 reason = "displacement is a const_double";
5311 goto report_error;
5312 }
3b3c6a3f
MM
5313 }
5314
f996902d
RH
5315 if (GET_CODE (disp) == CONST
5316 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5317 switch (XINT (XEXP (disp, 0), 1))
5318 {
5319 case UNSPEC_GOT:
5320 case UNSPEC_GOTOFF:
5321 case UNSPEC_GOTPCREL:
5322 if (!flag_pic)
5323 abort ();
5324 goto is_legitimate_pic;
5325
5326 case UNSPEC_GOTTPOFF:
dea73790
JJ
5327 case UNSPEC_GOTNTPOFF:
5328 case UNSPEC_INDNTPOFF:
f996902d
RH
5329 case UNSPEC_NTPOFF:
5330 case UNSPEC_DTPOFF:
5331 break;
5332
5333 default:
5334 reason = "invalid address unspec";
5335 goto report_error;
5336 }
5337
b069de3b
SS
5338 else if (flag_pic && (SYMBOLIC_CONST (disp)
5339#if TARGET_MACHO
5340 && !machopic_operand_p (disp)
5341#endif
5342 ))
3b3c6a3f 5343 {
f996902d 5344 is_legitimate_pic:
0d7d98ee
JH
5345 if (TARGET_64BIT && (index || base))
5346 {
5347 reason = "non-constant pic memory reference";
5348 goto report_error;
5349 }
91bb873f
RH
5350 if (! legitimate_pic_address_disp_p (disp))
5351 {
e075ae69 5352 reason = "displacement is an invalid pic construct";
50e60bc3 5353 goto report_error;
91bb873f
RH
5354 }
5355
4e9efe54 5356 /* This code used to verify that a symbolic pic displacement
0f290768
KH
5357 includes the pic_offset_table_rtx register.
5358
4e9efe54
JH
5359 While this is good idea, unfortunately these constructs may
5360 be created by "adds using lea" optimization for incorrect
5361 code like:
5362
5363 int a;
5364 int foo(int i)
5365 {
5366 return *(&a+i);
5367 }
5368
50e60bc3 5369 This code is nonsensical, but results in addressing
4e9efe54 5370 GOT table with pic_offset_table_rtx base. We can't
f710504c 5371 just refuse it easily, since it gets matched by
4e9efe54
JH
5372 "addsi3" pattern, that later gets split to lea in the
5373 case output register differs from input. While this
5374 can be handled by separate addsi pattern for this case
5375 that never results in lea, this seems to be easier and
5376 correct fix for crash to disable this test. */
3b3c6a3f 5377 }
f996902d
RH
5378 else if (!CONSTANT_ADDRESS_P (disp))
5379 {
5380 reason = "displacement is not constant";
5381 goto report_error;
5382 }
3b3c6a3f
MM
5383 }
5384
e075ae69 5385 /* Everything looks valid. */
3b3c6a3f 5386 if (TARGET_DEBUG_ADDR)
e075ae69 5387 fprintf (stderr, "Success.\n");
3b3c6a3f 5388 return TRUE;
e075ae69 5389
5bf0ebab 5390 report_error:
e075ae69
RH
5391 if (TARGET_DEBUG_ADDR)
5392 {
5393 fprintf (stderr, "Error: %s\n", reason);
5394 debug_rtx (reason_rtx);
5395 }
5396 return FALSE;
3b3c6a3f 5397}
3b3c6a3f 5398\f
55efb413
JW
5399/* Return an unique alias set for the GOT. */
5400
0f290768 5401static HOST_WIDE_INT
55efb413
JW
5402ix86_GOT_alias_set ()
5403{
5bf0ebab
RH
5404 static HOST_WIDE_INT set = -1;
5405 if (set == -1)
5406 set = new_alias_set ();
5407 return set;
0f290768 5408}
55efb413 5409
3b3c6a3f
MM
5410/* Return a legitimate reference for ORIG (an address) using the
5411 register REG. If REG is 0, a new pseudo is generated.
5412
91bb873f 5413 There are two types of references that must be handled:
3b3c6a3f
MM
5414
5415 1. Global data references must load the address from the GOT, via
5416 the PIC reg. An insn is emitted to do this load, and the reg is
5417 returned.
5418
91bb873f
RH
5419 2. Static data references, constant pool addresses, and code labels
5420 compute the address as an offset from the GOT, whose base is in
5421 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5422 differentiate them from global data objects. The returned
5423 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
5424
5425 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 5426 reg also appears in the address. */
3b3c6a3f
MM
5427
5428rtx
5429legitimize_pic_address (orig, reg)
5430 rtx orig;
5431 rtx reg;
5432{
5433 rtx addr = orig;
5434 rtx new = orig;
91bb873f 5435 rtx base;
3b3c6a3f 5436
b069de3b
SS
5437#if TARGET_MACHO
5438 if (reg == 0)
5439 reg = gen_reg_rtx (Pmode);
5440 /* Use the generic Mach-O PIC machinery. */
5441 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5442#endif
5443
623fe810 5444 if (local_symbolic_operand (addr, Pmode))
3b3c6a3f 5445 {
14f73b5a
JH
5446 /* In 64bit mode we can address such objects directly. */
5447 if (TARGET_64BIT)
5448 new = addr;
5449 else
5450 {
5451 /* This symbol may be referenced via a displacement from the PIC
5452 base address (@GOTOFF). */
3b3c6a3f 5453
66edd3b4
RH
5454 if (reload_in_progress)
5455 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf 5456 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
14f73b5a
JH
5457 new = gen_rtx_CONST (Pmode, new);
5458 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 5459
14f73b5a
JH
5460 if (reg != 0)
5461 {
5462 emit_move_insn (reg, new);
5463 new = reg;
5464 }
5465 }
3b3c6a3f 5466 }
91bb873f 5467 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 5468 {
14f73b5a
JH
5469 if (TARGET_64BIT)
5470 {
8ee41eaf 5471 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
14f73b5a
JH
5472 new = gen_rtx_CONST (Pmode, new);
5473 new = gen_rtx_MEM (Pmode, new);
5474 RTX_UNCHANGING_P (new) = 1;
5475 set_mem_alias_set (new, ix86_GOT_alias_set ());
5476
5477 if (reg == 0)
5478 reg = gen_reg_rtx (Pmode);
5479 /* Use directly gen_movsi, otherwise the address is loaded
5480 into register for CSE. We don't want to CSE this addresses,
5481 instead we CSE addresses from the GOT table, so skip this. */
5482 emit_insn (gen_movsi (reg, new));
5483 new = reg;
5484 }
5485 else
5486 {
5487 /* This symbol must be referenced via a load from the
5488 Global Offset Table (@GOT). */
3b3c6a3f 5489
66edd3b4
RH
5490 if (reload_in_progress)
5491 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf 5492 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
14f73b5a
JH
5493 new = gen_rtx_CONST (Pmode, new);
5494 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5495 new = gen_rtx_MEM (Pmode, new);
5496 RTX_UNCHANGING_P (new) = 1;
5497 set_mem_alias_set (new, ix86_GOT_alias_set ());
3b3c6a3f 5498
14f73b5a
JH
5499 if (reg == 0)
5500 reg = gen_reg_rtx (Pmode);
5501 emit_move_insn (reg, new);
5502 new = reg;
5503 }
0f290768 5504 }
91bb873f
RH
5505 else
5506 {
5507 if (GET_CODE (addr) == CONST)
3b3c6a3f 5508 {
91bb873f 5509 addr = XEXP (addr, 0);
e3c8ea67
RH
5510
5511 /* We must match stuff we generate before. Assume the only
5512 unspecs that can get here are ours. Not that we could do
5513 anything with them anyway... */
5514 if (GET_CODE (addr) == UNSPEC
5515 || (GET_CODE (addr) == PLUS
5516 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5517 return orig;
5518 if (GET_CODE (addr) != PLUS)
564d80f4 5519 abort ();
3b3c6a3f 5520 }
91bb873f
RH
5521 if (GET_CODE (addr) == PLUS)
5522 {
5523 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 5524
91bb873f
RH
5525 /* Check first to see if this is a constant offset from a @GOTOFF
5526 symbol reference. */
623fe810 5527 if (local_symbolic_operand (op0, Pmode)
91bb873f
RH
5528 && GET_CODE (op1) == CONST_INT)
5529 {
6eb791fc
JH
5530 if (!TARGET_64BIT)
5531 {
66edd3b4
RH
5532 if (reload_in_progress)
5533 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf
RH
5534 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5535 UNSPEC_GOTOFF);
6eb791fc
JH
5536 new = gen_rtx_PLUS (Pmode, new, op1);
5537 new = gen_rtx_CONST (Pmode, new);
5538 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
91bb873f 5539
6eb791fc
JH
5540 if (reg != 0)
5541 {
5542 emit_move_insn (reg, new);
5543 new = reg;
5544 }
5545 }
5546 else
91bb873f 5547 {
6eb791fc 5548 /* ??? We need to limit offsets here. */
91bb873f
RH
5549 }
5550 }
5551 else
5552 {
5553 base = legitimize_pic_address (XEXP (addr, 0), reg);
5554 new = legitimize_pic_address (XEXP (addr, 1),
5555 base == reg ? NULL_RTX : reg);
5556
5557 if (GET_CODE (new) == CONST_INT)
5558 new = plus_constant (base, INTVAL (new));
5559 else
5560 {
5561 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5562 {
5563 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5564 new = XEXP (new, 1);
5565 }
5566 new = gen_rtx_PLUS (Pmode, base, new);
5567 }
5568 }
5569 }
3b3c6a3f
MM
5570 }
5571 return new;
5572}
fb49053f 5573
fb49053f 5574static void
f996902d 5575ix86_encode_section_info (decl, first)
fb49053f
RH
5576 tree decl;
5577 int first ATTRIBUTE_UNUSED;
5578{
f996902d
RH
5579 bool local_p = (*targetm.binds_local_p) (decl);
5580 rtx rtl, symbol;
5581
5582 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5583 if (GET_CODE (rtl) != MEM)
5584 return;
5585 symbol = XEXP (rtl, 0);
5586 if (GET_CODE (symbol) != SYMBOL_REF)
5587 return;
5588
5589 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5590 symbol so that we may access it directly in the GOT. */
5591
fb49053f 5592 if (flag_pic)
f996902d
RH
5593 SYMBOL_REF_FLAG (symbol) = local_p;
5594
5595 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5596 "local dynamic", "initial exec" or "local exec" TLS models
5597 respectively. */
5598
5599 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
fb49053f 5600 {
f996902d
RH
5601 const char *symbol_str;
5602 char *newstr;
5603 size_t len;
dce81a1a 5604 enum tls_model kind = decl_tls_model (decl);
f996902d
RH
5605
5606 symbol_str = XSTR (symbol, 0);
fb49053f 5607
f996902d
RH
5608 if (symbol_str[0] == '%')
5609 {
5610 if (symbol_str[1] == tls_model_chars[kind])
5611 return;
5612 symbol_str += 2;
5613 }
5614 len = strlen (symbol_str) + 1;
5615 newstr = alloca (len + 2);
5616
5617 newstr[0] = '%';
5618 newstr[1] = tls_model_chars[kind];
5619 memcpy (newstr + 2, symbol_str, len);
5620
5621 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
fb49053f
RH
5622 }
5623}
f996902d
RH
5624
5625/* Undo the above when printing symbol names. */
5626
5627static const char *
5628ix86_strip_name_encoding (str)
5629 const char *str;
5630{
5631 if (str[0] == '%')
5632 str += 2;
5633 if (str [0] == '*')
5634 str += 1;
5635 return str;
5636}
3b3c6a3f 5637\f
f996902d
RH
5638/* Load the thread pointer into a register. */
5639
5640static rtx
5641get_thread_pointer ()
5642{
5643 rtx tp;
5644
5645 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9e20be0c
JJ
5646 tp = gen_rtx_MEM (Pmode, tp);
5647 RTX_UNCHANGING_P (tp) = 1;
5648 set_mem_alias_set (tp, ix86_GOT_alias_set ());
f996902d
RH
5649 tp = force_reg (Pmode, tp);
5650
5651 return tp;
5652}
fce5a9f2 5653
3b3c6a3f
MM
5654/* Try machine-dependent ways of modifying an illegitimate address
5655 to be legitimate. If we find one, return the new, valid address.
5656 This macro is used in only one place: `memory_address' in explow.c.
5657
5658 OLDX is the address as it was before break_out_memory_refs was called.
5659 In some cases it is useful to look at this to decide what needs to be done.
5660
5661 MODE and WIN are passed so that this macro can use
5662 GO_IF_LEGITIMATE_ADDRESS.
5663
5664 It is always safe for this macro to do nothing. It exists to recognize
5665 opportunities to optimize the output.
5666
5667 For the 80386, we handle X+REG by loading X into a register R and
5668 using R+REG. R will go in a general reg and indexing will be used.
5669 However, if REG is a broken-out memory address or multiplication,
5670 nothing needs to be done because REG can certainly go in a general reg.
5671
5672 When -fpic is used, special handling is needed for symbolic references.
5673 See comments by legitimize_pic_address in i386.c for details. */
5674
5675rtx
5676legitimize_address (x, oldx, mode)
5677 register rtx x;
bb5177ac 5678 register rtx oldx ATTRIBUTE_UNUSED;
3b3c6a3f
MM
5679 enum machine_mode mode;
5680{
5681 int changed = 0;
5682 unsigned log;
5683
5684 if (TARGET_DEBUG_ADDR)
5685 {
e9a25f70
JL
5686 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5687 GET_MODE_NAME (mode));
3b3c6a3f
MM
5688 debug_rtx (x);
5689 }
5690
f996902d
RH
5691 log = tls_symbolic_operand (x, mode);
5692 if (log)
5693 {
5694 rtx dest, base, off, pic;
5695
755ac5d4 5696 switch (log)
f996902d
RH
5697 {
5698 case TLS_MODEL_GLOBAL_DYNAMIC:
5699 dest = gen_reg_rtx (Pmode);
5700 emit_insn (gen_tls_global_dynamic (dest, x));
5701 break;
5702
5703 case TLS_MODEL_LOCAL_DYNAMIC:
5704 base = gen_reg_rtx (Pmode);
5705 emit_insn (gen_tls_local_dynamic_base (base));
5706
5707 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5708 off = gen_rtx_CONST (Pmode, off);
5709
5710 return gen_rtx_PLUS (Pmode, base, off);
5711
5712 case TLS_MODEL_INITIAL_EXEC:
5713 if (flag_pic)
5714 {
66edd3b4
RH
5715 if (reload_in_progress)
5716 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
f996902d
RH
5717 pic = pic_offset_table_rtx;
5718 }
dea73790 5719 else if (!TARGET_GNU_TLS)
f996902d
RH
5720 {
5721 pic = gen_reg_rtx (Pmode);
5722 emit_insn (gen_set_got (pic));
5723 }
dea73790
JJ
5724 else
5725 pic = NULL;
f996902d
RH
5726
5727 base = get_thread_pointer ();
5728
dea73790
JJ
5729 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5730 !TARGET_GNU_TLS
5731 ? UNSPEC_GOTTPOFF
5732 : flag_pic ? UNSPEC_GOTNTPOFF
5733 : UNSPEC_INDNTPOFF);
f996902d 5734 off = gen_rtx_CONST (Pmode, off);
dea73790
JJ
5735 if (flag_pic || !TARGET_GNU_TLS)
5736 off = gen_rtx_PLUS (Pmode, pic, off);
f996902d
RH
5737 off = gen_rtx_MEM (Pmode, off);
5738 RTX_UNCHANGING_P (off) = 1;
5739 set_mem_alias_set (off, ix86_GOT_alias_set ());
f996902d 5740 dest = gen_reg_rtx (Pmode);
dea73790
JJ
5741
5742 if (TARGET_GNU_TLS)
5743 {
5744 emit_move_insn (dest, off);
5745 return gen_rtx_PLUS (Pmode, base, dest);
5746 }
5747 else
5748 emit_insn (gen_subsi3 (dest, base, off));
f996902d
RH
5749 break;
5750
5751 case TLS_MODEL_LOCAL_EXEC:
5752 base = get_thread_pointer ();
5753
5754 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5755 TARGET_GNU_TLS ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5756 off = gen_rtx_CONST (Pmode, off);
5757
5758 if (TARGET_GNU_TLS)
5759 return gen_rtx_PLUS (Pmode, base, off);
5760 else
5761 {
5762 dest = gen_reg_rtx (Pmode);
5763 emit_insn (gen_subsi3 (dest, base, off));
5764 }
5765 break;
5766
5767 default:
5768 abort ();
5769 }
5770
5771 return dest;
5772 }
5773
3b3c6a3f
MM
5774 if (flag_pic && SYMBOLIC_CONST (x))
5775 return legitimize_pic_address (x, 0);
5776
5777 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5778 if (GET_CODE (x) == ASHIFT
5779 && GET_CODE (XEXP (x, 1)) == CONST_INT
b531087a 5780 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3b3c6a3f
MM
5781 {
5782 changed = 1;
a269a03c
JC
5783 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5784 GEN_INT (1 << log));
3b3c6a3f
MM
5785 }
5786
5787 if (GET_CODE (x) == PLUS)
5788 {
0f290768 5789 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 5790
3b3c6a3f
MM
5791 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5792 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
b531087a 5793 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3b3c6a3f
MM
5794 {
5795 changed = 1;
c5c76735
JL
5796 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5797 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5798 GEN_INT (1 << log));
3b3c6a3f
MM
5799 }
5800
5801 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5802 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
b531087a 5803 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3b3c6a3f
MM
5804 {
5805 changed = 1;
c5c76735
JL
5806 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5807 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5808 GEN_INT (1 << log));
3b3c6a3f
MM
5809 }
5810
0f290768 5811 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
5812 if (GET_CODE (XEXP (x, 1)) == MULT)
5813 {
5814 rtx tmp = XEXP (x, 0);
5815 XEXP (x, 0) = XEXP (x, 1);
5816 XEXP (x, 1) = tmp;
5817 changed = 1;
5818 }
5819
5820 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5821 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5822 created by virtual register instantiation, register elimination, and
5823 similar optimizations. */
5824 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5825 {
5826 changed = 1;
c5c76735
JL
5827 x = gen_rtx_PLUS (Pmode,
5828 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5829 XEXP (XEXP (x, 1), 0)),
5830 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
5831 }
5832
e9a25f70
JL
5833 /* Canonicalize
5834 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
5835 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5836 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5837 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5838 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5839 && CONSTANT_P (XEXP (x, 1)))
5840 {
00c79232
ML
5841 rtx constant;
5842 rtx other = NULL_RTX;
3b3c6a3f
MM
5843
5844 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5845 {
5846 constant = XEXP (x, 1);
5847 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5848 }
5849 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5850 {
5851 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5852 other = XEXP (x, 1);
5853 }
5854 else
5855 constant = 0;
5856
5857 if (constant)
5858 {
5859 changed = 1;
c5c76735
JL
5860 x = gen_rtx_PLUS (Pmode,
5861 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5862 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5863 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
5864 }
5865 }
5866
5867 if (changed && legitimate_address_p (mode, x, FALSE))
5868 return x;
5869
5870 if (GET_CODE (XEXP (x, 0)) == MULT)
5871 {
5872 changed = 1;
5873 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5874 }
5875
5876 if (GET_CODE (XEXP (x, 1)) == MULT)
5877 {
5878 changed = 1;
5879 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5880 }
5881
5882 if (changed
5883 && GET_CODE (XEXP (x, 1)) == REG
5884 && GET_CODE (XEXP (x, 0)) == REG)
5885 return x;
5886
5887 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5888 {
5889 changed = 1;
5890 x = legitimize_pic_address (x, 0);
5891 }
5892
5893 if (changed && legitimate_address_p (mode, x, FALSE))
5894 return x;
5895
5896 if (GET_CODE (XEXP (x, 0)) == REG)
5897 {
5898 register rtx temp = gen_reg_rtx (Pmode);
5899 register rtx val = force_operand (XEXP (x, 1), temp);
5900 if (val != temp)
5901 emit_move_insn (temp, val);
5902
5903 XEXP (x, 1) = temp;
5904 return x;
5905 }
5906
5907 else if (GET_CODE (XEXP (x, 1)) == REG)
5908 {
5909 register rtx temp = gen_reg_rtx (Pmode);
5910 register rtx val = force_operand (XEXP (x, 0), temp);
5911 if (val != temp)
5912 emit_move_insn (temp, val);
5913
5914 XEXP (x, 0) = temp;
5915 return x;
5916 }
5917 }
5918
5919 return x;
5920}
2a2ab3f9
JVA
5921\f
5922/* Print an integer constant expression in assembler syntax. Addition
5923 and subtraction are the only arithmetic that may appear in these
5924 expressions. FILE is the stdio stream to write to, X is the rtx, and
5925 CODE is the operand print code from the output string. */
5926
5927static void
5928output_pic_addr_const (file, x, code)
5929 FILE *file;
5930 rtx x;
5931 int code;
5932{
5933 char buf[256];
5934
5935 switch (GET_CODE (x))
5936 {
5937 case PC:
5938 if (flag_pic)
5939 putc ('.', file);
5940 else
5941 abort ();
5942 break;
5943
5944 case SYMBOL_REF:
91bb873f 5945 assemble_name (file, XSTR (x, 0));
b069de3b 5946 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
91bb873f 5947 fputs ("@PLT", file);
2a2ab3f9
JVA
5948 break;
5949
91bb873f
RH
5950 case LABEL_REF:
5951 x = XEXP (x, 0);
5952 /* FALLTHRU */
2a2ab3f9
JVA
5953 case CODE_LABEL:
5954 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5955 assemble_name (asm_out_file, buf);
5956 break;
5957
5958 case CONST_INT:
f64cecad 5959 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
5960 break;
5961
5962 case CONST:
5963 /* This used to output parentheses around the expression,
5964 but that does not work on the 386 (either ATT or BSD assembler). */
5965 output_pic_addr_const (file, XEXP (x, 0), code);
5966 break;
5967
5968 case CONST_DOUBLE:
5969 if (GET_MODE (x) == VOIDmode)
5970 {
5971 /* We can use %d if the number is <32 bits and positive. */
5972 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
5973 fprintf (file, "0x%lx%08lx",
5974 (unsigned long) CONST_DOUBLE_HIGH (x),
5975 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 5976 else
f64cecad 5977 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
5978 }
5979 else
5980 /* We can't handle floating point constants;
5981 PRINT_OPERAND must handle them. */
5982 output_operand_lossage ("floating constant misused");
5983 break;
5984
5985 case PLUS:
e9a25f70 5986 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
5987 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5988 {
2a2ab3f9 5989 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 5990 putc ('+', file);
e9a25f70 5991 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 5992 }
91bb873f 5993 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 5994 {
2a2ab3f9 5995 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 5996 putc ('+', file);
e9a25f70 5997 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 5998 }
91bb873f
RH
5999 else
6000 abort ();
2a2ab3f9
JVA
6001 break;
6002
6003 case MINUS:
b069de3b
SS
6004 if (!TARGET_MACHO)
6005 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
2a2ab3f9 6006 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 6007 putc ('-', file);
2a2ab3f9 6008 output_pic_addr_const (file, XEXP (x, 1), code);
b069de3b
SS
6009 if (!TARGET_MACHO)
6010 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
2a2ab3f9
JVA
6011 break;
6012
91bb873f
RH
6013 case UNSPEC:
6014 if (XVECLEN (x, 0) != 1)
5bf0ebab 6015 abort ();
91bb873f
RH
6016 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6017 switch (XINT (x, 1))
77ebd435 6018 {
8ee41eaf 6019 case UNSPEC_GOT:
77ebd435
AJ
6020 fputs ("@GOT", file);
6021 break;
8ee41eaf 6022 case UNSPEC_GOTOFF:
77ebd435
AJ
6023 fputs ("@GOTOFF", file);
6024 break;
8ee41eaf 6025 case UNSPEC_GOTPCREL:
edfe8595 6026 fputs ("@GOTPCREL(%rip)", file);
6eb791fc 6027 break;
f996902d 6028 case UNSPEC_GOTTPOFF:
dea73790 6029 /* FIXME: This might be @TPOFF in Sun ld too. */
f996902d
RH
6030 fputs ("@GOTTPOFF", file);
6031 break;
6032 case UNSPEC_TPOFF:
6033 fputs ("@TPOFF", file);
6034 break;
6035 case UNSPEC_NTPOFF:
6036 fputs ("@NTPOFF", file);
6037 break;
6038 case UNSPEC_DTPOFF:
6039 fputs ("@DTPOFF", file);
6040 break;
dea73790
JJ
6041 case UNSPEC_GOTNTPOFF:
6042 fputs ("@GOTNTPOFF", file);
6043 break;
6044 case UNSPEC_INDNTPOFF:
6045 fputs ("@INDNTPOFF", file);
6046 break;
77ebd435
AJ
6047 default:
6048 output_operand_lossage ("invalid UNSPEC as operand");
6049 break;
6050 }
91bb873f
RH
6051 break;
6052
2a2ab3f9
JVA
6053 default:
6054 output_operand_lossage ("invalid expression as operand");
6055 }
6056}
1865dbb5 6057
0f290768 6058/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
1865dbb5
JM
6059 We need to handle our special PIC relocations. */
6060
0f290768 6061void
1865dbb5
JM
6062i386_dwarf_output_addr_const (file, x)
6063 FILE *file;
6064 rtx x;
6065{
14f73b5a 6066#ifdef ASM_QUAD
18b5b8d6 6067 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
14f73b5a
JH
6068#else
6069 if (TARGET_64BIT)
6070 abort ();
18b5b8d6 6071 fprintf (file, "%s", ASM_LONG);
14f73b5a 6072#endif
1865dbb5
JM
6073 if (flag_pic)
6074 output_pic_addr_const (file, x, '\0');
6075 else
6076 output_addr_const (file, x);
6077 fputc ('\n', file);
6078}
6079
b9203463
RH
6080/* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6081 We need to emit DTP-relative relocations. */
6082
6083void
6084i386_output_dwarf_dtprel (file, size, x)
6085 FILE *file;
6086 int size;
6087 rtx x;
6088{
6089 switch (size)
6090 {
6091 case 4:
6092 fputs (ASM_LONG, file);
6093 break;
6094 case 8:
6095#ifdef ASM_QUAD
6096 fputs (ASM_QUAD, file);
6097 break;
6098#endif
6099 default:
6100 abort ();
6101 }
6102
6103 output_addr_const (file, x);
6104 fputs ("@DTPOFF", file);
6105}
6106
1865dbb5
JM
6107/* In the name of slightly smaller debug output, and to cater to
6108 general assembler losage, recognize PIC+GOTOFF and turn it back
6109 into a direct symbol reference. */
6110
6111rtx
6112i386_simplify_dwarf_addr (orig_x)
6113 rtx orig_x;
6114{
ec65b2e3 6115 rtx x = orig_x, y;
1865dbb5 6116
4c8c0dec
JJ
6117 if (GET_CODE (x) == MEM)
6118 x = XEXP (x, 0);
6119
6eb791fc
JH
6120 if (TARGET_64BIT)
6121 {
6122 if (GET_CODE (x) != CONST
6123 || GET_CODE (XEXP (x, 0)) != UNSPEC
8ee41eaf 6124 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
4c8c0dec 6125 || GET_CODE (orig_x) != MEM)
6eb791fc
JH
6126 return orig_x;
6127 return XVECEXP (XEXP (x, 0), 0, 0);
6128 }
6129
1865dbb5 6130 if (GET_CODE (x) != PLUS
1865dbb5
JM
6131 || GET_CODE (XEXP (x, 1)) != CONST)
6132 return orig_x;
6133
ec65b2e3
JJ
6134 if (GET_CODE (XEXP (x, 0)) == REG
6135 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6136 /* %ebx + GOT/GOTOFF */
6137 y = NULL;
6138 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6139 {
6140 /* %ebx + %reg * scale + GOT/GOTOFF */
6141 y = XEXP (x, 0);
6142 if (GET_CODE (XEXP (y, 0)) == REG
6143 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6144 y = XEXP (y, 1);
6145 else if (GET_CODE (XEXP (y, 1)) == REG
6146 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6147 y = XEXP (y, 0);
6148 else
6149 return orig_x;
6150 if (GET_CODE (y) != REG
6151 && GET_CODE (y) != MULT
6152 && GET_CODE (y) != ASHIFT)
6153 return orig_x;
6154 }
6155 else
6156 return orig_x;
6157
1865dbb5
JM
6158 x = XEXP (XEXP (x, 1), 0);
6159 if (GET_CODE (x) == UNSPEC
8ee41eaf
RH
6160 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6161 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6162 {
6163 if (y)
6164 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6165 return XVECEXP (x, 0, 0);
6166 }
1865dbb5
JM
6167
6168 if (GET_CODE (x) == PLUS
6169 && GET_CODE (XEXP (x, 0)) == UNSPEC
6170 && GET_CODE (XEXP (x, 1)) == CONST_INT
8ee41eaf
RH
6171 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6172 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6173 && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6174 {
6175 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6176 if (y)
6177 return gen_rtx_PLUS (Pmode, y, x);
6178 return x;
6179 }
1865dbb5
JM
6180
6181 return orig_x;
6182}
2a2ab3f9 6183\f
a269a03c 6184static void
e075ae69 6185put_condition_code (code, mode, reverse, fp, file)
a269a03c 6186 enum rtx_code code;
e075ae69
RH
6187 enum machine_mode mode;
6188 int reverse, fp;
a269a03c
JC
6189 FILE *file;
6190{
a269a03c
JC
6191 const char *suffix;
6192
9a915772
JH
6193 if (mode == CCFPmode || mode == CCFPUmode)
6194 {
6195 enum rtx_code second_code, bypass_code;
6196 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6197 if (bypass_code != NIL || second_code != NIL)
b531087a 6198 abort ();
9a915772
JH
6199 code = ix86_fp_compare_code_to_integer (code);
6200 mode = CCmode;
6201 }
a269a03c
JC
6202 if (reverse)
6203 code = reverse_condition (code);
e075ae69 6204
a269a03c
JC
6205 switch (code)
6206 {
6207 case EQ:
6208 suffix = "e";
6209 break;
a269a03c
JC
6210 case NE:
6211 suffix = "ne";
6212 break;
a269a03c 6213 case GT:
7e08e190 6214 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
6215 abort ();
6216 suffix = "g";
a269a03c 6217 break;
a269a03c 6218 case GTU:
e075ae69
RH
6219 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6220 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 6221 if (mode != CCmode)
0f290768 6222 abort ();
e075ae69 6223 suffix = fp ? "nbe" : "a";
a269a03c 6224 break;
a269a03c 6225 case LT:
9076b9c1 6226 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6227 suffix = "s";
7e08e190 6228 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6229 suffix = "l";
9076b9c1 6230 else
0f290768 6231 abort ();
a269a03c 6232 break;
a269a03c 6233 case LTU:
9076b9c1 6234 if (mode != CCmode)
0f290768 6235 abort ();
a269a03c
JC
6236 suffix = "b";
6237 break;
a269a03c 6238 case GE:
9076b9c1 6239 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6240 suffix = "ns";
7e08e190 6241 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6242 suffix = "ge";
9076b9c1 6243 else
0f290768 6244 abort ();
a269a03c 6245 break;
a269a03c 6246 case GEU:
e075ae69 6247 /* ??? As above. */
7e08e190 6248 if (mode != CCmode)
0f290768 6249 abort ();
7e08e190 6250 suffix = fp ? "nb" : "ae";
a269a03c 6251 break;
a269a03c 6252 case LE:
7e08e190 6253 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
6254 abort ();
6255 suffix = "le";
a269a03c 6256 break;
a269a03c 6257 case LEU:
9076b9c1
JH
6258 if (mode != CCmode)
6259 abort ();
7e08e190 6260 suffix = "be";
a269a03c 6261 break;
3a3677ff 6262 case UNORDERED:
9e7adcb3 6263 suffix = fp ? "u" : "p";
3a3677ff
RH
6264 break;
6265 case ORDERED:
9e7adcb3 6266 suffix = fp ? "nu" : "np";
3a3677ff 6267 break;
a269a03c
JC
6268 default:
6269 abort ();
6270 }
6271 fputs (suffix, file);
6272}
6273
e075ae69
RH
6274void
6275print_reg (x, code, file)
6276 rtx x;
6277 int code;
6278 FILE *file;
e5cb57e8 6279{
e075ae69 6280 if (REGNO (x) == ARG_POINTER_REGNUM
564d80f4 6281 || REGNO (x) == FRAME_POINTER_REGNUM
e075ae69
RH
6282 || REGNO (x) == FLAGS_REG
6283 || REGNO (x) == FPSR_REG)
6284 abort ();
e9a25f70 6285
5bf0ebab 6286 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
e075ae69
RH
6287 putc ('%', file);
6288
ef6257cd 6289 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
6290 code = 2;
6291 else if (code == 'b')
6292 code = 1;
6293 else if (code == 'k')
6294 code = 4;
3f3f2124
JH
6295 else if (code == 'q')
6296 code = 8;
e075ae69
RH
6297 else if (code == 'y')
6298 code = 3;
6299 else if (code == 'h')
6300 code = 0;
6301 else
6302 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 6303
3f3f2124
JH
6304 /* Irritatingly, AMD extended registers use different naming convention
6305 from the normal registers. */
6306 if (REX_INT_REG_P (x))
6307 {
885a70fd
JH
6308 if (!TARGET_64BIT)
6309 abort ();
3f3f2124
JH
6310 switch (code)
6311 {
ef6257cd 6312 case 0:
c725bd79 6313 error ("extended registers have no high halves");
3f3f2124
JH
6314 break;
6315 case 1:
6316 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6317 break;
6318 case 2:
6319 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6320 break;
6321 case 4:
6322 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6323 break;
6324 case 8:
6325 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6326 break;
6327 default:
c725bd79 6328 error ("unsupported operand size for extended register");
3f3f2124
JH
6329 break;
6330 }
6331 return;
6332 }
e075ae69
RH
6333 switch (code)
6334 {
6335 case 3:
6336 if (STACK_TOP_P (x))
6337 {
6338 fputs ("st(0)", file);
6339 break;
6340 }
6341 /* FALLTHRU */
e075ae69 6342 case 8:
3f3f2124 6343 case 4:
e075ae69 6344 case 12:
446988df 6345 if (! ANY_FP_REG_P (x))
885a70fd 6346 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
e075ae69 6347 /* FALLTHRU */
a7180f70 6348 case 16:
e075ae69
RH
6349 case 2:
6350 fputs (hi_reg_name[REGNO (x)], file);
6351 break;
6352 case 1:
6353 fputs (qi_reg_name[REGNO (x)], file);
6354 break;
6355 case 0:
6356 fputs (qi_high_reg_name[REGNO (x)], file);
6357 break;
6358 default:
6359 abort ();
fe25fea3 6360 }
e5cb57e8
SC
6361}
6362
f996902d
RH
6363/* Locate some local-dynamic symbol still in use by this function
6364 so that we can print its name in some tls_local_dynamic_base
6365 pattern. */
6366
6367static const char *
6368get_some_local_dynamic_name ()
6369{
6370 rtx insn;
6371
6372 if (cfun->machine->some_ld_name)
6373 return cfun->machine->some_ld_name;
6374
6375 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6376 if (INSN_P (insn)
6377 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6378 return cfun->machine->some_ld_name;
6379
6380 abort ();
6381}
6382
6383static int
6384get_some_local_dynamic_name_1 (px, data)
6385 rtx *px;
6386 void *data ATTRIBUTE_UNUSED;
6387{
6388 rtx x = *px;
6389
6390 if (GET_CODE (x) == SYMBOL_REF
6391 && local_dynamic_symbolic_operand (x, Pmode))
6392 {
6393 cfun->machine->some_ld_name = XSTR (x, 0);
6394 return 1;
6395 }
6396
6397 return 0;
6398}
6399
2a2ab3f9 6400/* Meaning of CODE:
fe25fea3 6401 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 6402 C -- print opcode suffix for set/cmov insn.
fe25fea3 6403 c -- like C, but print reversed condition
ef6257cd 6404 F,f -- likewise, but for floating-point.
048b1c95
JJ
6405 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6406 nothing
2a2ab3f9
JVA
6407 R -- print the prefix for register names.
6408 z -- print the opcode suffix for the size of the current operand.
6409 * -- print a star (in certain assembler syntax)
fb204271 6410 A -- print an absolute memory reference.
2a2ab3f9 6411 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
6412 s -- print a shift double count, followed by the assemblers argument
6413 delimiter.
fe25fea3
SC
6414 b -- print the QImode name of the register for the indicated operand.
6415 %b0 would print %al if operands[0] is reg 0.
6416 w -- likewise, print the HImode name of the register.
6417 k -- likewise, print the SImode name of the register.
3f3f2124 6418 q -- likewise, print the DImode name of the register.
ef6257cd
JH
6419 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6420 y -- print "st(0)" instead of "st" as a register.
a46d1d38 6421 D -- print condition for SSE cmp instruction.
ef6257cd
JH
6422 P -- if PIC, print an @PLT suffix.
6423 X -- don't print any sort of PIC '@' suffix for a symbol.
f996902d 6424 & -- print some in-use local-dynamic symbol name.
a46d1d38 6425 */
2a2ab3f9
JVA
6426
6427void
6428print_operand (file, x, code)
6429 FILE *file;
6430 rtx x;
6431 int code;
6432{
6433 if (code)
6434 {
6435 switch (code)
6436 {
6437 case '*':
80f33d06 6438 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9
JVA
6439 putc ('*', file);
6440 return;
6441
f996902d
RH
6442 case '&':
6443 assemble_name (file, get_some_local_dynamic_name ());
6444 return;
6445
fb204271 6446 case 'A':
80f33d06 6447 if (ASSEMBLER_DIALECT == ASM_ATT)
fb204271 6448 putc ('*', file);
80f33d06 6449 else if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
6450 {
6451 /* Intel syntax. For absolute addresses, registers should not
6452 be surrounded by braces. */
6453 if (GET_CODE (x) != REG)
6454 {
6455 putc ('[', file);
6456 PRINT_OPERAND (file, x, 0);
6457 putc (']', file);
6458 return;
6459 }
6460 }
80f33d06
GS
6461 else
6462 abort ();
fb204271
DN
6463
6464 PRINT_OPERAND (file, x, 0);
6465 return;
6466
6467
2a2ab3f9 6468 case 'L':
80f33d06 6469 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6470 putc ('l', file);
2a2ab3f9
JVA
6471 return;
6472
6473 case 'W':
80f33d06 6474 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6475 putc ('w', file);
2a2ab3f9
JVA
6476 return;
6477
6478 case 'B':
80f33d06 6479 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6480 putc ('b', file);
2a2ab3f9
JVA
6481 return;
6482
6483 case 'Q':
80f33d06 6484 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6485 putc ('l', file);
2a2ab3f9
JVA
6486 return;
6487
6488 case 'S':
80f33d06 6489 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6490 putc ('s', file);
2a2ab3f9
JVA
6491 return;
6492
5f1ec3e6 6493 case 'T':
80f33d06 6494 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6495 putc ('t', file);
5f1ec3e6
JVA
6496 return;
6497
2a2ab3f9
JVA
6498 case 'z':
6499 /* 387 opcodes don't get size suffixes if the operands are
0f290768 6500 registers. */
2a2ab3f9
JVA
6501 if (STACK_REG_P (x))
6502 return;
6503
831c4e87
KC
6504 /* Likewise if using Intel opcodes. */
6505 if (ASSEMBLER_DIALECT == ASM_INTEL)
6506 return;
6507
6508 /* This is the size of op from size of operand. */
2a2ab3f9
JVA
6509 switch (GET_MODE_SIZE (GET_MODE (x)))
6510 {
2a2ab3f9 6511 case 2:
155d8a47
JW
6512#ifdef HAVE_GAS_FILDS_FISTS
6513 putc ('s', file);
6514#endif
2a2ab3f9
JVA
6515 return;
6516
6517 case 4:
6518 if (GET_MODE (x) == SFmode)
6519 {
e075ae69 6520 putc ('s', file);
2a2ab3f9
JVA
6521 return;
6522 }
6523 else
e075ae69 6524 putc ('l', file);
2a2ab3f9
JVA
6525 return;
6526
5f1ec3e6 6527 case 12:
2b589241 6528 case 16:
e075ae69
RH
6529 putc ('t', file);
6530 return;
5f1ec3e6 6531
2a2ab3f9
JVA
6532 case 8:
6533 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
6534 {
6535#ifdef GAS_MNEMONICS
e075ae69 6536 putc ('q', file);
56c0e8fa 6537#else
e075ae69
RH
6538 putc ('l', file);
6539 putc ('l', file);
56c0e8fa
JVA
6540#endif
6541 }
e075ae69
RH
6542 else
6543 putc ('l', file);
2a2ab3f9 6544 return;
155d8a47
JW
6545
6546 default:
6547 abort ();
2a2ab3f9 6548 }
4af3895e
JVA
6549
6550 case 'b':
6551 case 'w':
6552 case 'k':
3f3f2124 6553 case 'q':
4af3895e
JVA
6554 case 'h':
6555 case 'y':
5cb6195d 6556 case 'X':
e075ae69 6557 case 'P':
4af3895e
JVA
6558 break;
6559
2d49677f
SC
6560 case 's':
6561 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6562 {
6563 PRINT_OPERAND (file, x, 0);
e075ae69 6564 putc (',', file);
2d49677f 6565 }
a269a03c
JC
6566 return;
6567
a46d1d38
JH
6568 case 'D':
6569 /* Little bit of braindamage here. The SSE compare instructions
6570 does use completely different names for the comparisons that the
6571 fp conditional moves. */
6572 switch (GET_CODE (x))
6573 {
6574 case EQ:
6575 case UNEQ:
6576 fputs ("eq", file);
6577 break;
6578 case LT:
6579 case UNLT:
6580 fputs ("lt", file);
6581 break;
6582 case LE:
6583 case UNLE:
6584 fputs ("le", file);
6585 break;
6586 case UNORDERED:
6587 fputs ("unord", file);
6588 break;
6589 case NE:
6590 case LTGT:
6591 fputs ("neq", file);
6592 break;
6593 case UNGE:
6594 case GE:
6595 fputs ("nlt", file);
6596 break;
6597 case UNGT:
6598 case GT:
6599 fputs ("nle", file);
6600 break;
6601 case ORDERED:
6602 fputs ("ord", file);
6603 break;
6604 default:
6605 abort ();
6606 break;
6607 }
6608 return;
048b1c95
JJ
6609 case 'O':
6610#ifdef CMOV_SUN_AS_SYNTAX
6611 if (ASSEMBLER_DIALECT == ASM_ATT)
6612 {
6613 switch (GET_MODE (x))
6614 {
6615 case HImode: putc ('w', file); break;
6616 case SImode:
6617 case SFmode: putc ('l', file); break;
6618 case DImode:
6619 case DFmode: putc ('q', file); break;
6620 default: abort ();
6621 }
6622 putc ('.', file);
6623 }
6624#endif
6625 return;
1853aadd 6626 case 'C':
e075ae69 6627 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 6628 return;
fe25fea3 6629 case 'F':
048b1c95
JJ
6630#ifdef CMOV_SUN_AS_SYNTAX
6631 if (ASSEMBLER_DIALECT == ASM_ATT)
6632 putc ('.', file);
6633#endif
e075ae69 6634 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
6635 return;
6636
e9a25f70 6637 /* Like above, but reverse condition */
e075ae69 6638 case 'c':
fce5a9f2 6639 /* Check to see if argument to %c is really a constant
c1d5afc4
CR
6640 and not a condition code which needs to be reversed. */
6641 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
6642 {
6643 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6644 return;
6645 }
e075ae69
RH
6646 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6647 return;
fe25fea3 6648 case 'f':
048b1c95
JJ
6649#ifdef CMOV_SUN_AS_SYNTAX
6650 if (ASSEMBLER_DIALECT == ASM_ATT)
6651 putc ('.', file);
6652#endif
e075ae69 6653 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 6654 return;
ef6257cd
JH
6655 case '+':
6656 {
6657 rtx x;
e5cb57e8 6658
ef6257cd
JH
6659 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6660 return;
a4f31c00 6661
ef6257cd
JH
6662 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6663 if (x)
6664 {
6665 int pred_val = INTVAL (XEXP (x, 0));
6666
6667 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6668 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6669 {
6670 int taken = pred_val > REG_BR_PROB_BASE / 2;
6671 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6672
6673 /* Emit hints only in the case default branch prediction
6674 heruistics would fail. */
6675 if (taken != cputaken)
6676 {
6677 /* We use 3e (DS) prefix for taken branches and
6678 2e (CS) prefix for not taken branches. */
6679 if (taken)
6680 fputs ("ds ; ", file);
6681 else
6682 fputs ("cs ; ", file);
6683 }
6684 }
6685 }
6686 return;
6687 }
4af3895e 6688 default:
a52453cc 6689 output_operand_lossage ("invalid operand code `%c'", code);
2a2ab3f9
JVA
6690 }
6691 }
e9a25f70 6692
2a2ab3f9
JVA
6693 if (GET_CODE (x) == REG)
6694 {
6695 PRINT_REG (x, code, file);
6696 }
e9a25f70 6697
2a2ab3f9
JVA
6698 else if (GET_CODE (x) == MEM)
6699 {
e075ae69 6700 /* No `byte ptr' prefix for call instructions. */
80f33d06 6701 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
2a2ab3f9 6702 {
69ddee61 6703 const char * size;
e075ae69
RH
6704 switch (GET_MODE_SIZE (GET_MODE (x)))
6705 {
6706 case 1: size = "BYTE"; break;
6707 case 2: size = "WORD"; break;
6708 case 4: size = "DWORD"; break;
6709 case 8: size = "QWORD"; break;
6710 case 12: size = "XWORD"; break;
a7180f70 6711 case 16: size = "XMMWORD"; break;
e075ae69 6712 default:
564d80f4 6713 abort ();
e075ae69 6714 }
fb204271
DN
6715
6716 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6717 if (code == 'b')
6718 size = "BYTE";
6719 else if (code == 'w')
6720 size = "WORD";
6721 else if (code == 'k')
6722 size = "DWORD";
6723
e075ae69
RH
6724 fputs (size, file);
6725 fputs (" PTR ", file);
2a2ab3f9 6726 }
e075ae69
RH
6727
6728 x = XEXP (x, 0);
6729 if (flag_pic && CONSTANT_ADDRESS_P (x))
6730 output_pic_addr_const (file, x, code);
0d7d98ee 6731 /* Avoid (%rip) for call operands. */
5bf0ebab 6732 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
0d7d98ee
JH
6733 && GET_CODE (x) != CONST_INT)
6734 output_addr_const (file, x);
c8b94768
RH
6735 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6736 output_operand_lossage ("invalid constraints for operand");
2a2ab3f9 6737 else
e075ae69 6738 output_address (x);
2a2ab3f9 6739 }
e9a25f70 6740
2a2ab3f9
JVA
6741 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6742 {
e9a25f70
JL
6743 REAL_VALUE_TYPE r;
6744 long l;
6745
5f1ec3e6
JVA
6746 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6747 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69 6748
80f33d06 6749 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6750 putc ('$', file);
52267fcb 6751 fprintf (file, "0x%lx", l);
5f1ec3e6 6752 }
e9a25f70 6753
0f290768 6754 /* These float cases don't actually occur as immediate operands. */
5f1ec3e6
JVA
6755 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6756 {
e9a25f70
JL
6757 char dstr[30];
6758
da6eec72 6759 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 6760 fprintf (file, "%s", dstr);
2a2ab3f9 6761 }
e9a25f70 6762
2b589241
JH
6763 else if (GET_CODE (x) == CONST_DOUBLE
6764 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
2a2ab3f9 6765 {
e9a25f70
JL
6766 char dstr[30];
6767
da6eec72 6768 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 6769 fprintf (file, "%s", dstr);
2a2ab3f9 6770 }
f996902d 6771
79325812 6772 else
2a2ab3f9 6773 {
4af3895e 6774 if (code != 'P')
2a2ab3f9 6775 {
695dac07 6776 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69 6777 {
80f33d06 6778 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
6779 putc ('$', file);
6780 }
2a2ab3f9
JVA
6781 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6782 || GET_CODE (x) == LABEL_REF)
e075ae69 6783 {
80f33d06 6784 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
6785 putc ('$', file);
6786 else
6787 fputs ("OFFSET FLAT:", file);
6788 }
2a2ab3f9 6789 }
e075ae69
RH
6790 if (GET_CODE (x) == CONST_INT)
6791 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6792 else if (flag_pic)
2a2ab3f9
JVA
6793 output_pic_addr_const (file, x, code);
6794 else
6795 output_addr_const (file, x);
6796 }
6797}
6798\f
6799/* Print a memory operand whose address is ADDR. */
6800
6801void
6802print_operand_address (file, addr)
6803 FILE *file;
6804 register rtx addr;
6805{
e075ae69
RH
6806 struct ix86_address parts;
6807 rtx base, index, disp;
6808 int scale;
e9a25f70 6809
9e20be0c
JJ
6810 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
6811 {
6812 if (ASSEMBLER_DIALECT == ASM_INTEL)
6813 fputs ("DWORD PTR ", file);
6814 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6815 putc ('%', file);
6816 fputs ("gs:0", file);
6817 return;
6818 }
6819
e075ae69
RH
6820 if (! ix86_decompose_address (addr, &parts))
6821 abort ();
e9a25f70 6822
e075ae69
RH
6823 base = parts.base;
6824 index = parts.index;
6825 disp = parts.disp;
6826 scale = parts.scale;
e9a25f70 6827
e075ae69
RH
6828 if (!base && !index)
6829 {
6830 /* Displacement only requires special attention. */
e9a25f70 6831
e075ae69 6832 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 6833 {
80f33d06 6834 if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
6835 {
6836 if (USER_LABEL_PREFIX[0] == 0)
6837 putc ('%', file);
6838 fputs ("ds:", file);
6839 }
e075ae69 6840 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
2a2ab3f9 6841 }
e075ae69
RH
6842 else if (flag_pic)
6843 output_pic_addr_const (file, addr, 0);
6844 else
6845 output_addr_const (file, addr);
0d7d98ee
JH
6846
6847 /* Use one byte shorter RIP relative addressing for 64bit mode. */
edfe8595
RH
6848 if (TARGET_64BIT
6849 && (GET_CODE (addr) == SYMBOL_REF
6850 || GET_CODE (addr) == LABEL_REF
6851 || (GET_CODE (addr) == CONST
6852 && GET_CODE (XEXP (addr, 0)) == PLUS
200bcf7e
JH
6853 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
6854 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
edfe8595 6855 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
0d7d98ee 6856 fputs ("(%rip)", file);
e075ae69
RH
6857 }
6858 else
6859 {
80f33d06 6860 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9 6861 {
e075ae69 6862 if (disp)
2a2ab3f9 6863 {
c399861d 6864 if (flag_pic)
e075ae69
RH
6865 output_pic_addr_const (file, disp, 0);
6866 else if (GET_CODE (disp) == LABEL_REF)
6867 output_asm_label (disp);
2a2ab3f9 6868 else
e075ae69 6869 output_addr_const (file, disp);
2a2ab3f9
JVA
6870 }
6871
e075ae69
RH
6872 putc ('(', file);
6873 if (base)
6874 PRINT_REG (base, 0, file);
6875 if (index)
2a2ab3f9 6876 {
e075ae69
RH
6877 putc (',', file);
6878 PRINT_REG (index, 0, file);
6879 if (scale != 1)
6880 fprintf (file, ",%d", scale);
2a2ab3f9 6881 }
e075ae69 6882 putc (')', file);
2a2ab3f9 6883 }
2a2ab3f9
JVA
6884 else
6885 {
e075ae69 6886 rtx offset = NULL_RTX;
e9a25f70 6887
e075ae69
RH
6888 if (disp)
6889 {
6890 /* Pull out the offset of a symbol; print any symbol itself. */
6891 if (GET_CODE (disp) == CONST
6892 && GET_CODE (XEXP (disp, 0)) == PLUS
6893 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6894 {
6895 offset = XEXP (XEXP (disp, 0), 1);
6896 disp = gen_rtx_CONST (VOIDmode,
6897 XEXP (XEXP (disp, 0), 0));
6898 }
ce193852 6899
e075ae69
RH
6900 if (flag_pic)
6901 output_pic_addr_const (file, disp, 0);
6902 else if (GET_CODE (disp) == LABEL_REF)
6903 output_asm_label (disp);
6904 else if (GET_CODE (disp) == CONST_INT)
6905 offset = disp;
6906 else
6907 output_addr_const (file, disp);
6908 }
e9a25f70 6909
e075ae69
RH
6910 putc ('[', file);
6911 if (base)
a8620236 6912 {
e075ae69
RH
6913 PRINT_REG (base, 0, file);
6914 if (offset)
6915 {
6916 if (INTVAL (offset) >= 0)
6917 putc ('+', file);
6918 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6919 }
a8620236 6920 }
e075ae69
RH
6921 else if (offset)
6922 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 6923 else
e075ae69 6924 putc ('0', file);
e9a25f70 6925
e075ae69
RH
6926 if (index)
6927 {
6928 putc ('+', file);
6929 PRINT_REG (index, 0, file);
6930 if (scale != 1)
6931 fprintf (file, "*%d", scale);
6932 }
6933 putc (']', file);
6934 }
2a2ab3f9
JVA
6935 }
6936}
f996902d
RH
6937
6938bool
6939output_addr_const_extra (file, x)
6940 FILE *file;
6941 rtx x;
6942{
6943 rtx op;
6944
6945 if (GET_CODE (x) != UNSPEC)
6946 return false;
6947
6948 op = XVECEXP (x, 0, 0);
6949 switch (XINT (x, 1))
6950 {
6951 case UNSPEC_GOTTPOFF:
6952 output_addr_const (file, op);
dea73790 6953 /* FIXME: This might be @TPOFF in Sun ld. */
f996902d
RH
6954 fputs ("@GOTTPOFF", file);
6955 break;
6956 case UNSPEC_TPOFF:
6957 output_addr_const (file, op);
6958 fputs ("@TPOFF", file);
6959 break;
6960 case UNSPEC_NTPOFF:
6961 output_addr_const (file, op);
6962 fputs ("@NTPOFF", file);
6963 break;
6964 case UNSPEC_DTPOFF:
6965 output_addr_const (file, op);
6966 fputs ("@DTPOFF", file);
6967 break;
dea73790
JJ
6968 case UNSPEC_GOTNTPOFF:
6969 output_addr_const (file, op);
6970 fputs ("@GOTNTPOFF", file);
6971 break;
6972 case UNSPEC_INDNTPOFF:
6973 output_addr_const (file, op);
6974 fputs ("@INDNTPOFF", file);
6975 break;
f996902d
RH
6976
6977 default:
6978 return false;
6979 }
6980
6981 return true;
6982}
2a2ab3f9
JVA
6983\f
6984/* Split one or more DImode RTL references into pairs of SImode
6985 references. The RTL can be REG, offsettable MEM, integer constant, or
6986 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6987 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 6988 that parallel "operands". */
2a2ab3f9
JVA
6989
6990void
6991split_di (operands, num, lo_half, hi_half)
6992 rtx operands[];
6993 int num;
6994 rtx lo_half[], hi_half[];
6995{
6996 while (num--)
6997 {
57dbca5e 6998 rtx op = operands[num];
b932f770
JH
6999
7000 /* simplify_subreg refuse to split volatile memory addresses,
7001 but we still have to handle it. */
7002 if (GET_CODE (op) == MEM)
2a2ab3f9 7003 {
f4ef873c 7004 lo_half[num] = adjust_address (op, SImode, 0);
b72f00af 7005 hi_half[num] = adjust_address (op, SImode, 4);
2a2ab3f9
JVA
7006 }
7007 else
b932f770 7008 {
38ca929b
JH
7009 lo_half[num] = simplify_gen_subreg (SImode, op,
7010 GET_MODE (op) == VOIDmode
7011 ? DImode : GET_MODE (op), 0);
7012 hi_half[num] = simplify_gen_subreg (SImode, op,
7013 GET_MODE (op) == VOIDmode
7014 ? DImode : GET_MODE (op), 4);
b932f770 7015 }
2a2ab3f9
JVA
7016 }
7017}
44cf5b6a
JH
7018/* Split one or more TImode RTL references into pairs of SImode
7019 references. The RTL can be REG, offsettable MEM, integer constant, or
7020 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7021 split and "num" is its length. lo_half and hi_half are output arrays
7022 that parallel "operands". */
7023
7024void
7025split_ti (operands, num, lo_half, hi_half)
7026 rtx operands[];
7027 int num;
7028 rtx lo_half[], hi_half[];
7029{
7030 while (num--)
7031 {
7032 rtx op = operands[num];
b932f770
JH
7033
7034 /* simplify_subreg refuse to split volatile memory addresses, but we
7035 still have to handle it. */
7036 if (GET_CODE (op) == MEM)
44cf5b6a
JH
7037 {
7038 lo_half[num] = adjust_address (op, DImode, 0);
7039 hi_half[num] = adjust_address (op, DImode, 8);
7040 }
7041 else
b932f770
JH
7042 {
7043 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7044 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7045 }
44cf5b6a
JH
7046 }
7047}
2a2ab3f9 7048\f
2a2ab3f9
JVA
7049/* Output code to perform a 387 binary operation in INSN, one of PLUS,
7050 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7051 is the expression of the binary operation. The output may either be
7052 emitted here, or returned to the caller, like all output_* functions.
7053
7054 There is no guarantee that the operands are the same mode, as they
0f290768 7055 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 7056
e3c2afab
AM
7057#ifndef SYSV386_COMPAT
7058/* Set to 1 for compatibility with brain-damaged assemblers. No-one
7059 wants to fix the assemblers because that causes incompatibility
7060 with gcc. No-one wants to fix gcc because that causes
7061 incompatibility with assemblers... You can use the option of
7062 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7063#define SYSV386_COMPAT 1
7064#endif
7065
69ddee61 7066const char *
2a2ab3f9
JVA
7067output_387_binary_op (insn, operands)
7068 rtx insn;
7069 rtx *operands;
7070{
e3c2afab 7071 static char buf[30];
69ddee61 7072 const char *p;
1deaa899
JH
7073 const char *ssep;
7074 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
2a2ab3f9 7075
e3c2afab
AM
7076#ifdef ENABLE_CHECKING
7077 /* Even if we do not want to check the inputs, this documents input
7078 constraints. Which helps in understanding the following code. */
7079 if (STACK_REG_P (operands[0])
7080 && ((REG_P (operands[1])
7081 && REGNO (operands[0]) == REGNO (operands[1])
7082 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7083 || (REG_P (operands[2])
7084 && REGNO (operands[0]) == REGNO (operands[2])
7085 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7086 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7087 ; /* ok */
1deaa899 7088 else if (!is_sse)
e3c2afab
AM
7089 abort ();
7090#endif
7091
2a2ab3f9
JVA
7092 switch (GET_CODE (operands[3]))
7093 {
7094 case PLUS:
e075ae69
RH
7095 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7096 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7097 p = "fiadd";
7098 else
7099 p = "fadd";
1deaa899 7100 ssep = "add";
2a2ab3f9
JVA
7101 break;
7102
7103 case MINUS:
e075ae69
RH
7104 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7105 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7106 p = "fisub";
7107 else
7108 p = "fsub";
1deaa899 7109 ssep = "sub";
2a2ab3f9
JVA
7110 break;
7111
7112 case MULT:
e075ae69
RH
7113 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7114 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7115 p = "fimul";
7116 else
7117 p = "fmul";
1deaa899 7118 ssep = "mul";
2a2ab3f9
JVA
7119 break;
7120
7121 case DIV:
e075ae69
RH
7122 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7123 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7124 p = "fidiv";
7125 else
7126 p = "fdiv";
1deaa899 7127 ssep = "div";
2a2ab3f9
JVA
7128 break;
7129
7130 default:
7131 abort ();
7132 }
7133
1deaa899
JH
7134 if (is_sse)
7135 {
7136 strcpy (buf, ssep);
7137 if (GET_MODE (operands[0]) == SFmode)
7138 strcat (buf, "ss\t{%2, %0|%0, %2}");
7139 else
7140 strcat (buf, "sd\t{%2, %0|%0, %2}");
7141 return buf;
7142 }
e075ae69 7143 strcpy (buf, p);
2a2ab3f9
JVA
7144
7145 switch (GET_CODE (operands[3]))
7146 {
7147 case MULT:
7148 case PLUS:
7149 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7150 {
e3c2afab 7151 rtx temp = operands[2];
2a2ab3f9
JVA
7152 operands[2] = operands[1];
7153 operands[1] = temp;
7154 }
7155
e3c2afab
AM
7156 /* know operands[0] == operands[1]. */
7157
2a2ab3f9 7158 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7159 {
7160 p = "%z2\t%2";
7161 break;
7162 }
2a2ab3f9
JVA
7163
7164 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
7165 {
7166 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7167 /* How is it that we are storing to a dead operand[2]?
7168 Well, presumably operands[1] is dead too. We can't
7169 store the result to st(0) as st(0) gets popped on this
7170 instruction. Instead store to operands[2] (which I
7171 think has to be st(1)). st(1) will be popped later.
7172 gcc <= 2.8.1 didn't have this check and generated
7173 assembly code that the Unixware assembler rejected. */
7174 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7175 else
e3c2afab 7176 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 7177 break;
6b28fd63 7178 }
2a2ab3f9
JVA
7179
7180 if (STACK_TOP_P (operands[0]))
e3c2afab 7181 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7182 else
e3c2afab 7183 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 7184 break;
2a2ab3f9
JVA
7185
7186 case MINUS:
7187 case DIV:
7188 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
7189 {
7190 p = "r%z1\t%1";
7191 break;
7192 }
2a2ab3f9
JVA
7193
7194 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7195 {
7196 p = "%z2\t%2";
7197 break;
7198 }
2a2ab3f9 7199
2a2ab3f9 7200 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 7201 {
e3c2afab
AM
7202#if SYSV386_COMPAT
7203 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7204 derived assemblers, confusingly reverse the direction of
7205 the operation for fsub{r} and fdiv{r} when the
7206 destination register is not st(0). The Intel assembler
7207 doesn't have this brain damage. Read !SYSV386_COMPAT to
7208 figure out what the hardware really does. */
7209 if (STACK_TOP_P (operands[0]))
7210 p = "{p\t%0, %2|rp\t%2, %0}";
7211 else
7212 p = "{rp\t%2, %0|p\t%0, %2}";
7213#else
6b28fd63 7214 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7215 /* As above for fmul/fadd, we can't store to st(0). */
7216 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7217 else
e3c2afab
AM
7218 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7219#endif
e075ae69 7220 break;
6b28fd63 7221 }
2a2ab3f9
JVA
7222
7223 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 7224 {
e3c2afab 7225#if SYSV386_COMPAT
6b28fd63 7226 if (STACK_TOP_P (operands[0]))
e3c2afab 7227 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 7228 else
e3c2afab
AM
7229 p = "{p\t%1, %0|rp\t%0, %1}";
7230#else
7231 if (STACK_TOP_P (operands[0]))
7232 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7233 else
7234 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7235#endif
e075ae69 7236 break;
6b28fd63 7237 }
2a2ab3f9
JVA
7238
7239 if (STACK_TOP_P (operands[0]))
7240 {
7241 if (STACK_TOP_P (operands[1]))
e3c2afab 7242 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7243 else
e3c2afab 7244 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 7245 break;
2a2ab3f9
JVA
7246 }
7247 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
7248 {
7249#if SYSV386_COMPAT
7250 p = "{\t%1, %0|r\t%0, %1}";
7251#else
7252 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7253#endif
7254 }
2a2ab3f9 7255 else
e3c2afab
AM
7256 {
7257#if SYSV386_COMPAT
7258 p = "{r\t%2, %0|\t%0, %2}";
7259#else
7260 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7261#endif
7262 }
e075ae69 7263 break;
2a2ab3f9
JVA
7264
7265 default:
7266 abort ();
7267 }
e075ae69
RH
7268
7269 strcat (buf, p);
7270 return buf;
2a2ab3f9 7271}
e075ae69 7272
a4f31c00 7273/* Output code to initialize control word copies used by
7a2e09f4
JH
7274 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7275 is set to control word rounding downwards. */
7276void
7277emit_i387_cw_initialization (normal, round_down)
7278 rtx normal, round_down;
7279{
7280 rtx reg = gen_reg_rtx (HImode);
7281
7282 emit_insn (gen_x86_fnstcw_1 (normal));
7283 emit_move_insn (reg, normal);
7284 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7285 && !TARGET_64BIT)
7286 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7287 else
7288 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7289 emit_move_insn (round_down, reg);
7290}
7291
2a2ab3f9 7292/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 7293 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 7294 operand may be [SDX]Fmode. */
2a2ab3f9 7295
69ddee61 7296const char *
2a2ab3f9
JVA
7297output_fix_trunc (insn, operands)
7298 rtx insn;
7299 rtx *operands;
7300{
7301 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69 7302 int dimode_p = GET_MODE (operands[0]) == DImode;
2a2ab3f9 7303
e075ae69
RH
7304 /* Jump through a hoop or two for DImode, since the hardware has no
7305 non-popping instruction. We used to do this a different way, but
7306 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
7307 if (dimode_p && !stack_top_dies)
7308 output_asm_insn ("fld\t%y1", operands);
e075ae69 7309
7a2e09f4 7310 if (!STACK_TOP_P (operands[1]))
10195bd8
JW
7311 abort ();
7312
e075ae69 7313 if (GET_CODE (operands[0]) != MEM)
7a2e09f4 7314 abort ();
e9a25f70 7315
7a2e09f4 7316 output_asm_insn ("fldcw\t%3", operands);
e075ae69 7317 if (stack_top_dies || dimode_p)
7a2e09f4 7318 output_asm_insn ("fistp%z0\t%0", operands);
10195bd8 7319 else
7a2e09f4 7320 output_asm_insn ("fist%z0\t%0", operands);
e075ae69 7321 output_asm_insn ("fldcw\t%2", operands);
10195bd8 7322
e075ae69 7323 return "";
2a2ab3f9 7324}
cda749b1 7325
e075ae69
RH
7326/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7327 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7328 when fucom should be used. */
7329
69ddee61 7330const char *
e075ae69 7331output_fp_compare (insn, operands, eflags_p, unordered_p)
cda749b1
JW
7332 rtx insn;
7333 rtx *operands;
e075ae69 7334 int eflags_p, unordered_p;
cda749b1 7335{
e075ae69
RH
7336 int stack_top_dies;
7337 rtx cmp_op0 = operands[0];
7338 rtx cmp_op1 = operands[1];
0644b628 7339 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
e075ae69
RH
7340
7341 if (eflags_p == 2)
7342 {
7343 cmp_op0 = cmp_op1;
7344 cmp_op1 = operands[2];
7345 }
0644b628
JH
7346 if (is_sse)
7347 {
7348 if (GET_MODE (operands[0]) == SFmode)
7349 if (unordered_p)
7350 return "ucomiss\t{%1, %0|%0, %1}";
7351 else
7352 return "comiss\t{%1, %0|%0, %y}";
7353 else
7354 if (unordered_p)
7355 return "ucomisd\t{%1, %0|%0, %1}";
7356 else
7357 return "comisd\t{%1, %0|%0, %y}";
7358 }
cda749b1 7359
e075ae69 7360 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
7361 abort ();
7362
e075ae69 7363 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 7364
e075ae69
RH
7365 if (STACK_REG_P (cmp_op1)
7366 && stack_top_dies
7367 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7368 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 7369 {
e075ae69
RH
7370 /* If both the top of the 387 stack dies, and the other operand
7371 is also a stack register that dies, then this must be a
7372 `fcompp' float compare */
7373
7374 if (eflags_p == 1)
7375 {
7376 /* There is no double popping fcomi variant. Fortunately,
7377 eflags is immune from the fstp's cc clobbering. */
7378 if (unordered_p)
7379 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7380 else
7381 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7382 return "fstp\t%y0";
7383 }
7384 else
cda749b1 7385 {
e075ae69
RH
7386 if (eflags_p == 2)
7387 {
7388 if (unordered_p)
7389 return "fucompp\n\tfnstsw\t%0";
7390 else
7391 return "fcompp\n\tfnstsw\t%0";
7392 }
cda749b1
JW
7393 else
7394 {
e075ae69
RH
7395 if (unordered_p)
7396 return "fucompp";
7397 else
7398 return "fcompp";
cda749b1
JW
7399 }
7400 }
cda749b1
JW
7401 }
7402 else
7403 {
e075ae69 7404 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 7405
0f290768 7406 static const char * const alt[24] =
e075ae69
RH
7407 {
7408 "fcom%z1\t%y1",
7409 "fcomp%z1\t%y1",
7410 "fucom%z1\t%y1",
7411 "fucomp%z1\t%y1",
0f290768 7412
e075ae69
RH
7413 "ficom%z1\t%y1",
7414 "ficomp%z1\t%y1",
7415 NULL,
7416 NULL,
7417
7418 "fcomi\t{%y1, %0|%0, %y1}",
7419 "fcomip\t{%y1, %0|%0, %y1}",
7420 "fucomi\t{%y1, %0|%0, %y1}",
7421 "fucomip\t{%y1, %0|%0, %y1}",
7422
7423 NULL,
7424 NULL,
7425 NULL,
7426 NULL,
7427
7428 "fcom%z2\t%y2\n\tfnstsw\t%0",
7429 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7430 "fucom%z2\t%y2\n\tfnstsw\t%0",
7431 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 7432
e075ae69
RH
7433 "ficom%z2\t%y2\n\tfnstsw\t%0",
7434 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7435 NULL,
7436 NULL
7437 };
7438
7439 int mask;
69ddee61 7440 const char *ret;
e075ae69
RH
7441
7442 mask = eflags_p << 3;
7443 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7444 mask |= unordered_p << 1;
7445 mask |= stack_top_dies;
7446
7447 if (mask >= 24)
7448 abort ();
7449 ret = alt[mask];
7450 if (ret == NULL)
7451 abort ();
cda749b1 7452
e075ae69 7453 return ret;
cda749b1
JW
7454 }
7455}
2a2ab3f9 7456
f88c65f7
RH
7457void
7458ix86_output_addr_vec_elt (file, value)
7459 FILE *file;
7460 int value;
7461{
7462 const char *directive = ASM_LONG;
7463
7464 if (TARGET_64BIT)
7465 {
7466#ifdef ASM_QUAD
7467 directive = ASM_QUAD;
7468#else
7469 abort ();
7470#endif
7471 }
7472
7473 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7474}
7475
7476void
7477ix86_output_addr_diff_elt (file, value, rel)
7478 FILE *file;
7479 int value, rel;
7480{
7481 if (TARGET_64BIT)
74411039 7482 fprintf (file, "%s%s%d-%s%d\n",
f88c65f7
RH
7483 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7484 else if (HAVE_AS_GOTOFF_IN_DATA)
7485 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
b069de3b
SS
7486#if TARGET_MACHO
7487 else if (TARGET_MACHO)
7488 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7489 machopic_function_base_name () + 1);
7490#endif
f88c65f7 7491 else
5fc0e5df
KW
7492 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7493 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
f88c65f7 7494}
32b5b1aa 7495\f
a8bac9ab
RH
7496/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7497 for the target. */
7498
7499void
7500ix86_expand_clear (dest)
7501 rtx dest;
7502{
7503 rtx tmp;
7504
7505 /* We play register width games, which are only valid after reload. */
7506 if (!reload_completed)
7507 abort ();
7508
7509 /* Avoid HImode and its attendant prefix byte. */
7510 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7511 dest = gen_rtx_REG (SImode, REGNO (dest));
7512
7513 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7514
7515 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7516 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7517 {
7518 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7519 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7520 }
7521
7522 emit_insn (tmp);
7523}
7524
f996902d
RH
7525/* X is an unchanging MEM. If it is a constant pool reference, return
7526 the constant pool rtx, else NULL. */
7527
7528static rtx
7529maybe_get_pool_constant (x)
7530 rtx x;
7531{
7532 x = XEXP (x, 0);
7533
7534 if (flag_pic)
7535 {
7536 if (GET_CODE (x) != PLUS)
7537 return NULL_RTX;
7538 if (XEXP (x, 0) != pic_offset_table_rtx)
7539 return NULL_RTX;
7540 x = XEXP (x, 1);
7541 if (GET_CODE (x) != CONST)
7542 return NULL_RTX;
7543 x = XEXP (x, 0);
7544 if (GET_CODE (x) != UNSPEC)
7545 return NULL_RTX;
7546 if (XINT (x, 1) != UNSPEC_GOTOFF)
7547 return NULL_RTX;
7548 x = XVECEXP (x, 0, 0);
7549 }
7550
7551 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7552 return get_pool_constant (x);
7553
7554 return NULL_RTX;
7555}
7556
79325812 7557void
e075ae69
RH
7558ix86_expand_move (mode, operands)
7559 enum machine_mode mode;
7560 rtx operands[];
32b5b1aa 7561{
e075ae69 7562 int strict = (reload_in_progress || reload_completed);
f996902d
RH
7563 rtx insn, op0, op1, tmp;
7564
7565 op0 = operands[0];
7566 op1 = operands[1];
7567
7568 /* ??? We have a slight problem. We need to say that tls symbols are
7569 not legitimate constants so that reload does not helpfully reload
7570 these constants from a REG_EQUIV, which we cannot handle. (Recall
7571 that general- and local-dynamic address resolution requires a
7572 function call.)
e9a25f70 7573
f996902d
RH
7574 However, if we say that tls symbols are not legitimate constants,
7575 then emit_move_insn helpfully drop them into the constant pool.
7576
7577 It is far easier to work around emit_move_insn than reload. Recognize
7578 the MEM that we would have created and extract the symbol_ref. */
7579
7580 if (mode == Pmode
7581 && GET_CODE (op1) == MEM
7582 && RTX_UNCHANGING_P (op1))
32b5b1aa 7583 {
f996902d
RH
7584 tmp = maybe_get_pool_constant (op1);
7585 /* Note that we only care about symbolic constants here, which
7586 unlike CONST_INT will always have a proper mode. */
7587 if (tmp && GET_MODE (tmp) == Pmode)
7588 op1 = tmp;
7589 }
e9a25f70 7590
f996902d
RH
7591 if (tls_symbolic_operand (op1, Pmode))
7592 {
7593 op1 = legitimize_address (op1, op1, VOIDmode);
7594 if (GET_CODE (op0) == MEM)
7595 {
7596 tmp = gen_reg_rtx (mode);
7597 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7598 op1 = tmp;
7599 }
7600 }
7601 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7602 {
b069de3b
SS
7603#if TARGET_MACHO
7604 if (MACHOPIC_PURE)
7605 {
7606 rtx temp = ((reload_in_progress
7607 || ((op0 && GET_CODE (op0) == REG)
7608 && mode == Pmode))
7609 ? op0 : gen_reg_rtx (Pmode));
7610 op1 = machopic_indirect_data_reference (op1, temp);
7611 op1 = machopic_legitimize_pic_address (op1, mode,
7612 temp == op1 ? 0 : temp);
7613 }
7614 else
7615 {
7616 if (MACHOPIC_INDIRECT)
7617 op1 = machopic_indirect_data_reference (op1, 0);
7618 }
7619 if (op0 != op1)
7620 {
7621 insn = gen_rtx_SET (VOIDmode, op0, op1);
7622 emit_insn (insn);
7623 }
7624 return;
7625#endif /* TARGET_MACHO */
f996902d
RH
7626 if (GET_CODE (op0) == MEM)
7627 op1 = force_reg (Pmode, op1);
e075ae69 7628 else
32b5b1aa 7629 {
f996902d 7630 rtx temp = op0;
e075ae69
RH
7631 if (GET_CODE (temp) != REG)
7632 temp = gen_reg_rtx (Pmode);
f996902d
RH
7633 temp = legitimize_pic_address (op1, temp);
7634 if (temp == op0)
e075ae69 7635 return;
f996902d 7636 op1 = temp;
32b5b1aa 7637 }
e075ae69
RH
7638 }
7639 else
7640 {
f996902d 7641 if (GET_CODE (op0) == MEM
44cf5b6a 7642 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
f996902d
RH
7643 || !push_operand (op0, mode))
7644 && GET_CODE (op1) == MEM)
7645 op1 = force_reg (mode, op1);
e9a25f70 7646
f996902d
RH
7647 if (push_operand (op0, mode)
7648 && ! general_no_elim_operand (op1, mode))
7649 op1 = copy_to_mode_reg (mode, op1);
2c5a510c 7650
44cf5b6a
JH
7651 /* Force large constants in 64bit compilation into register
7652 to get them CSEed. */
7653 if (TARGET_64BIT && mode == DImode
f996902d
RH
7654 && immediate_operand (op1, mode)
7655 && !x86_64_zero_extended_value (op1)
7656 && !register_operand (op0, mode)
44cf5b6a 7657 && optimize && !reload_completed && !reload_in_progress)
f996902d 7658 op1 = copy_to_mode_reg (mode, op1);
44cf5b6a 7659
e075ae69 7660 if (FLOAT_MODE_P (mode))
32b5b1aa 7661 {
d7a29404
JH
7662 /* If we are loading a floating point constant to a register,
7663 force the value to memory now, since we'll get better code
7664 out the back end. */
e075ae69
RH
7665
7666 if (strict)
7667 ;
f996902d
RH
7668 else if (GET_CODE (op1) == CONST_DOUBLE
7669 && register_operand (op0, mode))
7670 op1 = validize_mem (force_const_mem (mode, op1));
32b5b1aa 7671 }
32b5b1aa 7672 }
e9a25f70 7673
f996902d 7674 insn = gen_rtx_SET (VOIDmode, op0, op1);
e9a25f70 7675
e075ae69
RH
7676 emit_insn (insn);
7677}
e9a25f70 7678
e37af218
RH
7679void
7680ix86_expand_vector_move (mode, operands)
7681 enum machine_mode mode;
7682 rtx operands[];
7683{
7684 /* Force constants other than zero into memory. We do not know how
7685 the instructions used to build constants modify the upper 64 bits
7686 of the register, once we have that information we may be able
7687 to handle some of them more efficiently. */
7688 if ((reload_in_progress | reload_completed) == 0
7689 && register_operand (operands[0], mode)
7690 && CONSTANT_P (operands[1]))
7691 {
7692 rtx addr = gen_reg_rtx (Pmode);
7693 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
7694 operands[1] = gen_rtx_MEM (mode, addr);
7695 }
7696
7697 /* Make operand1 a register if it isn't already. */
7698 if ((reload_in_progress | reload_completed) == 0
7699 && !register_operand (operands[0], mode)
b105d6da 7700 && !register_operand (operands[1], mode))
e37af218 7701 {
59bef189 7702 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
e37af218
RH
7703 emit_move_insn (operands[0], temp);
7704 return;
7705 }
7706
7707 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
fce5a9f2 7708}
e37af218 7709
e075ae69
RH
7710/* Attempt to expand a binary operator. Make the expansion closer to the
7711 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 7712 memory references (one output, two input) in a single insn. */
e9a25f70 7713
e075ae69
RH
7714void
7715ix86_expand_binary_operator (code, mode, operands)
7716 enum rtx_code code;
7717 enum machine_mode mode;
7718 rtx operands[];
7719{
7720 int matching_memory;
7721 rtx src1, src2, dst, op, clob;
7722
7723 dst = operands[0];
7724 src1 = operands[1];
7725 src2 = operands[2];
7726
7727 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7728 if (GET_RTX_CLASS (code) == 'c'
7729 && (rtx_equal_p (dst, src2)
7730 || immediate_operand (src1, mode)))
7731 {
7732 rtx temp = src1;
7733 src1 = src2;
7734 src2 = temp;
32b5b1aa 7735 }
e9a25f70 7736
e075ae69
RH
7737 /* If the destination is memory, and we do not have matching source
7738 operands, do things in registers. */
7739 matching_memory = 0;
7740 if (GET_CODE (dst) == MEM)
32b5b1aa 7741 {
e075ae69
RH
7742 if (rtx_equal_p (dst, src1))
7743 matching_memory = 1;
7744 else if (GET_RTX_CLASS (code) == 'c'
7745 && rtx_equal_p (dst, src2))
7746 matching_memory = 2;
7747 else
7748 dst = gen_reg_rtx (mode);
7749 }
0f290768 7750
e075ae69
RH
7751 /* Both source operands cannot be in memory. */
7752 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7753 {
7754 if (matching_memory != 2)
7755 src2 = force_reg (mode, src2);
7756 else
7757 src1 = force_reg (mode, src1);
32b5b1aa 7758 }
e9a25f70 7759
06a964de
JH
7760 /* If the operation is not commutable, source 1 cannot be a constant
7761 or non-matching memory. */
0f290768 7762 if ((CONSTANT_P (src1)
06a964de
JH
7763 || (!matching_memory && GET_CODE (src1) == MEM))
7764 && GET_RTX_CLASS (code) != 'c')
e075ae69 7765 src1 = force_reg (mode, src1);
0f290768 7766
e075ae69 7767 /* If optimizing, copy to regs to improve CSE */
fe577e58 7768 if (optimize && ! no_new_pseudos)
32b5b1aa 7769 {
e075ae69
RH
7770 if (GET_CODE (dst) == MEM)
7771 dst = gen_reg_rtx (mode);
7772 if (GET_CODE (src1) == MEM)
7773 src1 = force_reg (mode, src1);
7774 if (GET_CODE (src2) == MEM)
7775 src2 = force_reg (mode, src2);
32b5b1aa 7776 }
e9a25f70 7777
e075ae69
RH
7778 /* Emit the instruction. */
7779
7780 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7781 if (reload_in_progress)
7782 {
7783 /* Reload doesn't know about the flags register, and doesn't know that
7784 it doesn't want to clobber it. We can only do this with PLUS. */
7785 if (code != PLUS)
7786 abort ();
7787 emit_insn (op);
7788 }
7789 else
32b5b1aa 7790 {
e075ae69
RH
7791 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7792 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 7793 }
e9a25f70 7794
e075ae69
RH
7795 /* Fix up the destination if needed. */
7796 if (dst != operands[0])
7797 emit_move_insn (operands[0], dst);
7798}
7799
7800/* Return TRUE or FALSE depending on whether the binary operator meets the
7801 appropriate constraints. */
7802
7803int
7804ix86_binary_operator_ok (code, mode, operands)
7805 enum rtx_code code;
7806 enum machine_mode mode ATTRIBUTE_UNUSED;
7807 rtx operands[3];
7808{
7809 /* Both source operands cannot be in memory. */
7810 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7811 return 0;
7812 /* If the operation is not commutable, source 1 cannot be a constant. */
7813 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
7814 return 0;
7815 /* If the destination is memory, we must have a matching source operand. */
7816 if (GET_CODE (operands[0]) == MEM
7817 && ! (rtx_equal_p (operands[0], operands[1])
7818 || (GET_RTX_CLASS (code) == 'c'
7819 && rtx_equal_p (operands[0], operands[2]))))
7820 return 0;
06a964de 7821 /* If the operation is not commutable and the source 1 is memory, we must
d6a7951f 7822 have a matching destination. */
06a964de
JH
7823 if (GET_CODE (operands[1]) == MEM
7824 && GET_RTX_CLASS (code) != 'c'
7825 && ! rtx_equal_p (operands[0], operands[1]))
7826 return 0;
e075ae69
RH
7827 return 1;
7828}
7829
7830/* Attempt to expand a unary operator. Make the expansion closer to the
7831 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 7832 memory references (one output, one input) in a single insn. */
e075ae69 7833
9d81fc27 7834void
e075ae69
RH
7835ix86_expand_unary_operator (code, mode, operands)
7836 enum rtx_code code;
7837 enum machine_mode mode;
7838 rtx operands[];
7839{
06a964de
JH
7840 int matching_memory;
7841 rtx src, dst, op, clob;
7842
7843 dst = operands[0];
7844 src = operands[1];
e075ae69 7845
06a964de
JH
7846 /* If the destination is memory, and we do not have matching source
7847 operands, do things in registers. */
7848 matching_memory = 0;
7849 if (GET_CODE (dst) == MEM)
32b5b1aa 7850 {
06a964de
JH
7851 if (rtx_equal_p (dst, src))
7852 matching_memory = 1;
e075ae69 7853 else
06a964de 7854 dst = gen_reg_rtx (mode);
32b5b1aa 7855 }
e9a25f70 7856
06a964de
JH
7857 /* When source operand is memory, destination must match. */
7858 if (!matching_memory && GET_CODE (src) == MEM)
7859 src = force_reg (mode, src);
0f290768 7860
06a964de 7861 /* If optimizing, copy to regs to improve CSE */
fe577e58 7862 if (optimize && ! no_new_pseudos)
06a964de
JH
7863 {
7864 if (GET_CODE (dst) == MEM)
7865 dst = gen_reg_rtx (mode);
7866 if (GET_CODE (src) == MEM)
7867 src = force_reg (mode, src);
7868 }
7869
7870 /* Emit the instruction. */
7871
7872 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7873 if (reload_in_progress || code == NOT)
7874 {
7875 /* Reload doesn't know about the flags register, and doesn't know that
7876 it doesn't want to clobber it. */
7877 if (code != NOT)
7878 abort ();
7879 emit_insn (op);
7880 }
7881 else
7882 {
7883 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7884 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7885 }
7886
7887 /* Fix up the destination if needed. */
7888 if (dst != operands[0])
7889 emit_move_insn (operands[0], dst);
e075ae69
RH
7890}
7891
7892/* Return TRUE or FALSE depending on whether the unary operator meets the
7893 appropriate constraints. */
7894
7895int
7896ix86_unary_operator_ok (code, mode, operands)
7897 enum rtx_code code ATTRIBUTE_UNUSED;
7898 enum machine_mode mode ATTRIBUTE_UNUSED;
7899 rtx operands[2] ATTRIBUTE_UNUSED;
7900{
06a964de
JH
7901 /* If one of operands is memory, source and destination must match. */
7902 if ((GET_CODE (operands[0]) == MEM
7903 || GET_CODE (operands[1]) == MEM)
7904 && ! rtx_equal_p (operands[0], operands[1]))
7905 return FALSE;
e075ae69
RH
7906 return TRUE;
7907}
7908
16189740
RH
7909/* Return TRUE or FALSE depending on whether the first SET in INSN
7910 has source and destination with matching CC modes, and that the
7911 CC mode is at least as constrained as REQ_MODE. */
7912
7913int
7914ix86_match_ccmode (insn, req_mode)
7915 rtx insn;
7916 enum machine_mode req_mode;
7917{
7918 rtx set;
7919 enum machine_mode set_mode;
7920
7921 set = PATTERN (insn);
7922 if (GET_CODE (set) == PARALLEL)
7923 set = XVECEXP (set, 0, 0);
7924 if (GET_CODE (set) != SET)
7925 abort ();
9076b9c1
JH
7926 if (GET_CODE (SET_SRC (set)) != COMPARE)
7927 abort ();
16189740
RH
7928
7929 set_mode = GET_MODE (SET_DEST (set));
7930 switch (set_mode)
7931 {
9076b9c1
JH
7932 case CCNOmode:
7933 if (req_mode != CCNOmode
7934 && (req_mode != CCmode
7935 || XEXP (SET_SRC (set), 1) != const0_rtx))
7936 return 0;
7937 break;
16189740 7938 case CCmode:
9076b9c1 7939 if (req_mode == CCGCmode)
16189740
RH
7940 return 0;
7941 /* FALLTHRU */
9076b9c1
JH
7942 case CCGCmode:
7943 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7944 return 0;
7945 /* FALLTHRU */
7946 case CCGOCmode:
16189740
RH
7947 if (req_mode == CCZmode)
7948 return 0;
7949 /* FALLTHRU */
7950 case CCZmode:
7951 break;
7952
7953 default:
7954 abort ();
7955 }
7956
7957 return (GET_MODE (SET_SRC (set)) == set_mode);
7958}
7959
e075ae69
RH
7960/* Generate insn patterns to do an integer compare of OPERANDS. */
7961
7962static rtx
7963ix86_expand_int_compare (code, op0, op1)
7964 enum rtx_code code;
7965 rtx op0, op1;
7966{
7967 enum machine_mode cmpmode;
7968 rtx tmp, flags;
7969
7970 cmpmode = SELECT_CC_MODE (code, op0, op1);
7971 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7972
7973 /* This is very simple, but making the interface the same as in the
7974 FP case makes the rest of the code easier. */
7975 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7976 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7977
7978 /* Return the test that should be put into the flags user, i.e.
7979 the bcc, scc, or cmov instruction. */
7980 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7981}
7982
3a3677ff
RH
7983/* Figure out whether to use ordered or unordered fp comparisons.
7984 Return the appropriate mode to use. */
e075ae69 7985
b1cdafbb 7986enum machine_mode
3a3677ff 7987ix86_fp_compare_mode (code)
8752c357 7988 enum rtx_code code ATTRIBUTE_UNUSED;
e075ae69 7989{
9e7adcb3
JH
7990 /* ??? In order to make all comparisons reversible, we do all comparisons
7991 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7992 all forms trapping and nontrapping comparisons, we can make inequality
7993 comparisons trapping again, since it results in better code when using
7994 FCOM based compares. */
7995 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
7996}
7997
9076b9c1
JH
7998enum machine_mode
7999ix86_cc_mode (code, op0, op1)
8000 enum rtx_code code;
8001 rtx op0, op1;
8002{
8003 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8004 return ix86_fp_compare_mode (code);
8005 switch (code)
8006 {
8007 /* Only zero flag is needed. */
8008 case EQ: /* ZF=0 */
8009 case NE: /* ZF!=0 */
8010 return CCZmode;
8011 /* Codes needing carry flag. */
265dab10
JH
8012 case GEU: /* CF=0 */
8013 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
8014 case LTU: /* CF=1 */
8015 case LEU: /* CF=1 | ZF=1 */
265dab10 8016 return CCmode;
9076b9c1
JH
8017 /* Codes possibly doable only with sign flag when
8018 comparing against zero. */
8019 case GE: /* SF=OF or SF=0 */
7e08e190 8020 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
8021 if (op1 == const0_rtx)
8022 return CCGOCmode;
8023 else
8024 /* For other cases Carry flag is not required. */
8025 return CCGCmode;
8026 /* Codes doable only with sign flag when comparing
8027 against zero, but we miss jump instruction for it
8028 so we need to use relational tests agains overflow
8029 that thus needs to be zero. */
8030 case GT: /* ZF=0 & SF=OF */
8031 case LE: /* ZF=1 | SF<>OF */
8032 if (op1 == const0_rtx)
8033 return CCNOmode;
8034 else
8035 return CCGCmode;
7fcd7218
JH
8036 /* strcmp pattern do (use flags) and combine may ask us for proper
8037 mode. */
8038 case USE:
8039 return CCmode;
9076b9c1 8040 default:
0f290768 8041 abort ();
9076b9c1
JH
8042 }
8043}
8044
3a3677ff
RH
8045/* Return true if we should use an FCOMI instruction for this fp comparison. */
8046
a940d8bd 8047int
3a3677ff 8048ix86_use_fcomi_compare (code)
9e7adcb3 8049 enum rtx_code code ATTRIBUTE_UNUSED;
3a3677ff 8050{
9e7adcb3
JH
8051 enum rtx_code swapped_code = swap_condition (code);
8052 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8053 || (ix86_fp_comparison_cost (swapped_code)
8054 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
8055}
8056
0f290768 8057/* Swap, force into registers, or otherwise massage the two operands
3a3677ff
RH
8058 to a fp comparison. The operands are updated in place; the new
8059 comparsion code is returned. */
8060
8061static enum rtx_code
8062ix86_prepare_fp_compare_args (code, pop0, pop1)
8063 enum rtx_code code;
8064 rtx *pop0, *pop1;
8065{
8066 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8067 rtx op0 = *pop0, op1 = *pop1;
8068 enum machine_mode op_mode = GET_MODE (op0);
0644b628 8069 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
3a3677ff 8070
e075ae69 8071 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
8072 The same is true of the XFmode compare instructions. The same is
8073 true of the fcomi compare instructions. */
8074
0644b628
JH
8075 if (!is_sse
8076 && (fpcmp_mode == CCFPUmode
8077 || op_mode == XFmode
8078 || op_mode == TFmode
8079 || ix86_use_fcomi_compare (code)))
e075ae69 8080 {
3a3677ff
RH
8081 op0 = force_reg (op_mode, op0);
8082 op1 = force_reg (op_mode, op1);
e075ae69
RH
8083 }
8084 else
8085 {
8086 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8087 things around if they appear profitable, otherwise force op0
8088 into a register. */
8089
8090 if (standard_80387_constant_p (op0) == 0
8091 || (GET_CODE (op0) == MEM
8092 && ! (standard_80387_constant_p (op1) == 0
8093 || GET_CODE (op1) == MEM)))
32b5b1aa 8094 {
e075ae69
RH
8095 rtx tmp;
8096 tmp = op0, op0 = op1, op1 = tmp;
8097 code = swap_condition (code);
8098 }
8099
8100 if (GET_CODE (op0) != REG)
3a3677ff 8101 op0 = force_reg (op_mode, op0);
e075ae69
RH
8102
8103 if (CONSTANT_P (op1))
8104 {
8105 if (standard_80387_constant_p (op1))
3a3677ff 8106 op1 = force_reg (op_mode, op1);
e075ae69 8107 else
3a3677ff 8108 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
8109 }
8110 }
e9a25f70 8111
9e7adcb3
JH
8112 /* Try to rearrange the comparison to make it cheaper. */
8113 if (ix86_fp_comparison_cost (code)
8114 > ix86_fp_comparison_cost (swap_condition (code))
558740bf 8115 && (GET_CODE (op1) == REG || !no_new_pseudos))
9e7adcb3
JH
8116 {
8117 rtx tmp;
8118 tmp = op0, op0 = op1, op1 = tmp;
8119 code = swap_condition (code);
8120 if (GET_CODE (op0) != REG)
8121 op0 = force_reg (op_mode, op0);
8122 }
8123
3a3677ff
RH
8124 *pop0 = op0;
8125 *pop1 = op1;
8126 return code;
8127}
8128
c0c102a9
JH
8129/* Convert comparison codes we use to represent FP comparison to integer
8130 code that will result in proper branch. Return UNKNOWN if no such code
8131 is available. */
8132static enum rtx_code
8133ix86_fp_compare_code_to_integer (code)
8134 enum rtx_code code;
8135{
8136 switch (code)
8137 {
8138 case GT:
8139 return GTU;
8140 case GE:
8141 return GEU;
8142 case ORDERED:
8143 case UNORDERED:
8144 return code;
8145 break;
8146 case UNEQ:
8147 return EQ;
8148 break;
8149 case UNLT:
8150 return LTU;
8151 break;
8152 case UNLE:
8153 return LEU;
8154 break;
8155 case LTGT:
8156 return NE;
8157 break;
8158 default:
8159 return UNKNOWN;
8160 }
8161}
8162
8163/* Split comparison code CODE into comparisons we can do using branch
8164 instructions. BYPASS_CODE is comparison code for branch that will
8165 branch around FIRST_CODE and SECOND_CODE. If some of branches
8166 is not required, set value to NIL.
8167 We never require more than two branches. */
8168static void
8169ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8170 enum rtx_code code, *bypass_code, *first_code, *second_code;
8171{
8172 *first_code = code;
8173 *bypass_code = NIL;
8174 *second_code = NIL;
8175
8176 /* The fcomi comparison sets flags as follows:
8177
8178 cmp ZF PF CF
8179 > 0 0 0
8180 < 0 0 1
8181 = 1 0 0
8182 un 1 1 1 */
8183
8184 switch (code)
8185 {
8186 case GT: /* GTU - CF=0 & ZF=0 */
8187 case GE: /* GEU - CF=0 */
8188 case ORDERED: /* PF=0 */
8189 case UNORDERED: /* PF=1 */
8190 case UNEQ: /* EQ - ZF=1 */
8191 case UNLT: /* LTU - CF=1 */
8192 case UNLE: /* LEU - CF=1 | ZF=1 */
8193 case LTGT: /* EQ - ZF=0 */
8194 break;
8195 case LT: /* LTU - CF=1 - fails on unordered */
8196 *first_code = UNLT;
8197 *bypass_code = UNORDERED;
8198 break;
8199 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8200 *first_code = UNLE;
8201 *bypass_code = UNORDERED;
8202 break;
8203 case EQ: /* EQ - ZF=1 - fails on unordered */
8204 *first_code = UNEQ;
8205 *bypass_code = UNORDERED;
8206 break;
8207 case NE: /* NE - ZF=0 - fails on unordered */
8208 *first_code = LTGT;
8209 *second_code = UNORDERED;
8210 break;
8211 case UNGE: /* GEU - CF=0 - fails on unordered */
8212 *first_code = GE;
8213 *second_code = UNORDERED;
8214 break;
8215 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8216 *first_code = GT;
8217 *second_code = UNORDERED;
8218 break;
8219 default:
8220 abort ();
8221 }
8222 if (!TARGET_IEEE_FP)
8223 {
8224 *second_code = NIL;
8225 *bypass_code = NIL;
8226 }
8227}
8228
9e7adcb3
JH
8229/* Return cost of comparison done fcom + arithmetics operations on AX.
8230 All following functions do use number of instructions as an cost metrics.
8231 In future this should be tweaked to compute bytes for optimize_size and
8232 take into account performance of various instructions on various CPUs. */
8233static int
8234ix86_fp_comparison_arithmetics_cost (code)
8235 enum rtx_code code;
8236{
8237 if (!TARGET_IEEE_FP)
8238 return 4;
8239 /* The cost of code output by ix86_expand_fp_compare. */
8240 switch (code)
8241 {
8242 case UNLE:
8243 case UNLT:
8244 case LTGT:
8245 case GT:
8246 case GE:
8247 case UNORDERED:
8248 case ORDERED:
8249 case UNEQ:
8250 return 4;
8251 break;
8252 case LT:
8253 case NE:
8254 case EQ:
8255 case UNGE:
8256 return 5;
8257 break;
8258 case LE:
8259 case UNGT:
8260 return 6;
8261 break;
8262 default:
8263 abort ();
8264 }
8265}
8266
8267/* Return cost of comparison done using fcomi operation.
8268 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8269static int
8270ix86_fp_comparison_fcomi_cost (code)
8271 enum rtx_code code;
8272{
8273 enum rtx_code bypass_code, first_code, second_code;
8274 /* Return arbitarily high cost when instruction is not supported - this
8275 prevents gcc from using it. */
8276 if (!TARGET_CMOVE)
8277 return 1024;
8278 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8279 return (bypass_code != NIL || second_code != NIL) + 2;
8280}
8281
8282/* Return cost of comparison done using sahf operation.
8283 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8284static int
8285ix86_fp_comparison_sahf_cost (code)
8286 enum rtx_code code;
8287{
8288 enum rtx_code bypass_code, first_code, second_code;
8289 /* Return arbitarily high cost when instruction is not preferred - this
8290 avoids gcc from using it. */
8291 if (!TARGET_USE_SAHF && !optimize_size)
8292 return 1024;
8293 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8294 return (bypass_code != NIL || second_code != NIL) + 3;
8295}
8296
8297/* Compute cost of the comparison done using any method.
8298 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8299static int
8300ix86_fp_comparison_cost (code)
8301 enum rtx_code code;
8302{
8303 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8304 int min;
8305
8306 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8307 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8308
8309 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8310 if (min > sahf_cost)
8311 min = sahf_cost;
8312 if (min > fcomi_cost)
8313 min = fcomi_cost;
8314 return min;
8315}
c0c102a9 8316
3a3677ff
RH
8317/* Generate insn patterns to do a floating point compare of OPERANDS. */
8318
9e7adcb3
JH
8319static rtx
8320ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
3a3677ff
RH
8321 enum rtx_code code;
8322 rtx op0, op1, scratch;
9e7adcb3
JH
8323 rtx *second_test;
8324 rtx *bypass_test;
3a3677ff
RH
8325{
8326 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 8327 rtx tmp, tmp2;
9e7adcb3 8328 int cost = ix86_fp_comparison_cost (code);
c0c102a9 8329 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
8330
8331 fpcmp_mode = ix86_fp_compare_mode (code);
8332 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8333
9e7adcb3
JH
8334 if (second_test)
8335 *second_test = NULL_RTX;
8336 if (bypass_test)
8337 *bypass_test = NULL_RTX;
8338
c0c102a9
JH
8339 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8340
9e7adcb3
JH
8341 /* Do fcomi/sahf based test when profitable. */
8342 if ((bypass_code == NIL || bypass_test)
8343 && (second_code == NIL || second_test)
8344 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 8345 {
c0c102a9
JH
8346 if (TARGET_CMOVE)
8347 {
8348 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8349 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8350 tmp);
8351 emit_insn (tmp);
8352 }
8353 else
8354 {
8355 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 8356 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
8357 if (!scratch)
8358 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
8359 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8360 emit_insn (gen_x86_sahf_1 (scratch));
8361 }
e075ae69
RH
8362
8363 /* The FP codes work out to act like unsigned. */
9a915772 8364 intcmp_mode = fpcmp_mode;
9e7adcb3
JH
8365 code = first_code;
8366 if (bypass_code != NIL)
8367 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8368 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8369 const0_rtx);
8370 if (second_code != NIL)
8371 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8372 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8373 const0_rtx);
e075ae69
RH
8374 }
8375 else
8376 {
8377 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69 8378 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 8379 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
8380 if (!scratch)
8381 scratch = gen_reg_rtx (HImode);
3a3677ff 8382 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 8383
9a915772
JH
8384 /* In the unordered case, we have to check C2 for NaN's, which
8385 doesn't happen to work out to anything nice combination-wise.
8386 So do some bit twiddling on the value we've got in AH to come
8387 up with an appropriate set of condition codes. */
e075ae69 8388
9a915772
JH
8389 intcmp_mode = CCNOmode;
8390 switch (code)
32b5b1aa 8391 {
9a915772
JH
8392 case GT:
8393 case UNGT:
8394 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 8395 {
3a3677ff 8396 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 8397 code = EQ;
9a915772
JH
8398 }
8399 else
8400 {
8401 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8402 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8403 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8404 intcmp_mode = CCmode;
8405 code = GEU;
8406 }
8407 break;
8408 case LT:
8409 case UNLT:
8410 if (code == LT && TARGET_IEEE_FP)
8411 {
3a3677ff
RH
8412 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8413 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
8414 intcmp_mode = CCmode;
8415 code = EQ;
9a915772
JH
8416 }
8417 else
8418 {
8419 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8420 code = NE;
8421 }
8422 break;
8423 case GE:
8424 case UNGE:
8425 if (code == GE || !TARGET_IEEE_FP)
8426 {
3a3677ff 8427 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 8428 code = EQ;
9a915772
JH
8429 }
8430 else
8431 {
8432 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8433 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8434 GEN_INT (0x01)));
8435 code = NE;
8436 }
8437 break;
8438 case LE:
8439 case UNLE:
8440 if (code == LE && TARGET_IEEE_FP)
8441 {
3a3677ff
RH
8442 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8443 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8444 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
8445 intcmp_mode = CCmode;
8446 code = LTU;
9a915772
JH
8447 }
8448 else
8449 {
8450 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8451 code = NE;
8452 }
8453 break;
8454 case EQ:
8455 case UNEQ:
8456 if (code == EQ && TARGET_IEEE_FP)
8457 {
3a3677ff
RH
8458 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8459 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
8460 intcmp_mode = CCmode;
8461 code = EQ;
9a915772
JH
8462 }
8463 else
8464 {
3a3677ff
RH
8465 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8466 code = NE;
8467 break;
9a915772
JH
8468 }
8469 break;
8470 case NE:
8471 case LTGT:
8472 if (code == NE && TARGET_IEEE_FP)
8473 {
3a3677ff 8474 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
8475 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8476 GEN_INT (0x40)));
3a3677ff 8477 code = NE;
9a915772
JH
8478 }
8479 else
8480 {
3a3677ff
RH
8481 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8482 code = EQ;
32b5b1aa 8483 }
9a915772
JH
8484 break;
8485
8486 case UNORDERED:
8487 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8488 code = NE;
8489 break;
8490 case ORDERED:
8491 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8492 code = EQ;
8493 break;
8494
8495 default:
8496 abort ();
32b5b1aa 8497 }
32b5b1aa 8498 }
e075ae69
RH
8499
8500 /* Return the test that should be put into the flags user, i.e.
8501 the bcc, scc, or cmov instruction. */
8502 return gen_rtx_fmt_ee (code, VOIDmode,
8503 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8504 const0_rtx);
8505}
8506
9e3e266c 8507rtx
a1b8572c 8508ix86_expand_compare (code, second_test, bypass_test)
e075ae69 8509 enum rtx_code code;
a1b8572c 8510 rtx *second_test, *bypass_test;
e075ae69
RH
8511{
8512 rtx op0, op1, ret;
8513 op0 = ix86_compare_op0;
8514 op1 = ix86_compare_op1;
8515
a1b8572c
JH
8516 if (second_test)
8517 *second_test = NULL_RTX;
8518 if (bypass_test)
8519 *bypass_test = NULL_RTX;
8520
e075ae69 8521 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
bf71a4f8 8522 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
77ebd435 8523 second_test, bypass_test);
32b5b1aa 8524 else
e075ae69
RH
8525 ret = ix86_expand_int_compare (code, op0, op1);
8526
8527 return ret;
8528}
8529
03598dea
JH
8530/* Return true if the CODE will result in nontrivial jump sequence. */
8531bool
8532ix86_fp_jump_nontrivial_p (code)
8533 enum rtx_code code;
8534{
8535 enum rtx_code bypass_code, first_code, second_code;
8536 if (!TARGET_CMOVE)
8537 return true;
8538 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8539 return bypass_code != NIL || second_code != NIL;
8540}
8541
e075ae69 8542void
3a3677ff 8543ix86_expand_branch (code, label)
e075ae69 8544 enum rtx_code code;
e075ae69
RH
8545 rtx label;
8546{
3a3677ff 8547 rtx tmp;
e075ae69 8548
3a3677ff 8549 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 8550 {
3a3677ff
RH
8551 case QImode:
8552 case HImode:
8553 case SImode:
0d7d98ee 8554 simple:
a1b8572c 8555 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
8556 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8557 gen_rtx_LABEL_REF (VOIDmode, label),
8558 pc_rtx);
8559 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 8560 return;
e075ae69 8561
3a3677ff
RH
8562 case SFmode:
8563 case DFmode:
0f290768 8564 case XFmode:
2b589241 8565 case TFmode:
3a3677ff
RH
8566 {
8567 rtvec vec;
8568 int use_fcomi;
03598dea 8569 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
8570
8571 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8572 &ix86_compare_op1);
fce5a9f2 8573
03598dea
JH
8574 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8575
8576 /* Check whether we will use the natural sequence with one jump. If
8577 so, we can expand jump early. Otherwise delay expansion by
8578 creating compound insn to not confuse optimizers. */
8579 if (bypass_code == NIL && second_code == NIL
8580 && TARGET_CMOVE)
8581 {
8582 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8583 gen_rtx_LABEL_REF (VOIDmode, label),
8584 pc_rtx, NULL_RTX);
8585 }
8586 else
8587 {
8588 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8589 ix86_compare_op0, ix86_compare_op1);
8590 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8591 gen_rtx_LABEL_REF (VOIDmode, label),
8592 pc_rtx);
8593 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8594
8595 use_fcomi = ix86_use_fcomi_compare (code);
8596 vec = rtvec_alloc (3 + !use_fcomi);
8597 RTVEC_ELT (vec, 0) = tmp;
8598 RTVEC_ELT (vec, 1)
8599 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8600 RTVEC_ELT (vec, 2)
8601 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8602 if (! use_fcomi)
8603 RTVEC_ELT (vec, 3)
8604 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8605
8606 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8607 }
3a3677ff
RH
8608 return;
8609 }
32b5b1aa 8610
3a3677ff 8611 case DImode:
0d7d98ee
JH
8612 if (TARGET_64BIT)
8613 goto simple;
3a3677ff
RH
8614 /* Expand DImode branch into multiple compare+branch. */
8615 {
8616 rtx lo[2], hi[2], label2;
8617 enum rtx_code code1, code2, code3;
32b5b1aa 8618
3a3677ff
RH
8619 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8620 {
8621 tmp = ix86_compare_op0;
8622 ix86_compare_op0 = ix86_compare_op1;
8623 ix86_compare_op1 = tmp;
8624 code = swap_condition (code);
8625 }
8626 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8627 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 8628
3a3677ff
RH
8629 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8630 avoid two branches. This costs one extra insn, so disable when
8631 optimizing for size. */
32b5b1aa 8632
3a3677ff
RH
8633 if ((code == EQ || code == NE)
8634 && (!optimize_size
8635 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8636 {
8637 rtx xor0, xor1;
32b5b1aa 8638
3a3677ff
RH
8639 xor1 = hi[0];
8640 if (hi[1] != const0_rtx)
8641 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8642 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 8643
3a3677ff
RH
8644 xor0 = lo[0];
8645 if (lo[1] != const0_rtx)
8646 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8647 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 8648
3a3677ff
RH
8649 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8650 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 8651
3a3677ff
RH
8652 ix86_compare_op0 = tmp;
8653 ix86_compare_op1 = const0_rtx;
8654 ix86_expand_branch (code, label);
8655 return;
8656 }
e075ae69 8657
1f9124e4
JJ
8658 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8659 op1 is a constant and the low word is zero, then we can just
8660 examine the high word. */
32b5b1aa 8661
1f9124e4
JJ
8662 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8663 switch (code)
8664 {
8665 case LT: case LTU: case GE: case GEU:
8666 ix86_compare_op0 = hi[0];
8667 ix86_compare_op1 = hi[1];
8668 ix86_expand_branch (code, label);
8669 return;
8670 default:
8671 break;
8672 }
e075ae69 8673
3a3677ff 8674 /* Otherwise, we need two or three jumps. */
e075ae69 8675
3a3677ff 8676 label2 = gen_label_rtx ();
e075ae69 8677
3a3677ff
RH
8678 code1 = code;
8679 code2 = swap_condition (code);
8680 code3 = unsigned_condition (code);
e075ae69 8681
3a3677ff
RH
8682 switch (code)
8683 {
8684 case LT: case GT: case LTU: case GTU:
8685 break;
e075ae69 8686
3a3677ff
RH
8687 case LE: code1 = LT; code2 = GT; break;
8688 case GE: code1 = GT; code2 = LT; break;
8689 case LEU: code1 = LTU; code2 = GTU; break;
8690 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 8691
3a3677ff
RH
8692 case EQ: code1 = NIL; code2 = NE; break;
8693 case NE: code2 = NIL; break;
e075ae69 8694
3a3677ff
RH
8695 default:
8696 abort ();
8697 }
e075ae69 8698
3a3677ff
RH
8699 /*
8700 * a < b =>
8701 * if (hi(a) < hi(b)) goto true;
8702 * if (hi(a) > hi(b)) goto false;
8703 * if (lo(a) < lo(b)) goto true;
8704 * false:
8705 */
8706
8707 ix86_compare_op0 = hi[0];
8708 ix86_compare_op1 = hi[1];
8709
8710 if (code1 != NIL)
8711 ix86_expand_branch (code1, label);
8712 if (code2 != NIL)
8713 ix86_expand_branch (code2, label2);
8714
8715 ix86_compare_op0 = lo[0];
8716 ix86_compare_op1 = lo[1];
8717 ix86_expand_branch (code3, label);
8718
8719 if (code2 != NIL)
8720 emit_label (label2);
8721 return;
8722 }
e075ae69 8723
3a3677ff
RH
8724 default:
8725 abort ();
8726 }
32b5b1aa 8727}
e075ae69 8728
9e7adcb3
JH
8729/* Split branch based on floating point condition. */
8730void
03598dea
JH
8731ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
8732 enum rtx_code code;
8733 rtx op1, op2, target1, target2, tmp;
9e7adcb3
JH
8734{
8735 rtx second, bypass;
8736 rtx label = NULL_RTX;
03598dea 8737 rtx condition;
6b24c259
JH
8738 int bypass_probability = -1, second_probability = -1, probability = -1;
8739 rtx i;
9e7adcb3
JH
8740
8741 if (target2 != pc_rtx)
8742 {
8743 rtx tmp = target2;
8744 code = reverse_condition_maybe_unordered (code);
8745 target2 = target1;
8746 target1 = tmp;
8747 }
8748
8749 condition = ix86_expand_fp_compare (code, op1, op2,
8750 tmp, &second, &bypass);
6b24c259
JH
8751
8752 if (split_branch_probability >= 0)
8753 {
8754 /* Distribute the probabilities across the jumps.
8755 Assume the BYPASS and SECOND to be always test
8756 for UNORDERED. */
8757 probability = split_branch_probability;
8758
d6a7951f 8759 /* Value of 1 is low enough to make no need for probability
6b24c259
JH
8760 to be updated. Later we may run some experiments and see
8761 if unordered values are more frequent in practice. */
8762 if (bypass)
8763 bypass_probability = 1;
8764 if (second)
8765 second_probability = 1;
8766 }
9e7adcb3
JH
8767 if (bypass != NULL_RTX)
8768 {
8769 label = gen_label_rtx ();
6b24c259
JH
8770 i = emit_jump_insn (gen_rtx_SET
8771 (VOIDmode, pc_rtx,
8772 gen_rtx_IF_THEN_ELSE (VOIDmode,
8773 bypass,
8774 gen_rtx_LABEL_REF (VOIDmode,
8775 label),
8776 pc_rtx)));
8777 if (bypass_probability >= 0)
8778 REG_NOTES (i)
8779 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8780 GEN_INT (bypass_probability),
8781 REG_NOTES (i));
8782 }
8783 i = emit_jump_insn (gen_rtx_SET
9e7adcb3
JH
8784 (VOIDmode, pc_rtx,
8785 gen_rtx_IF_THEN_ELSE (VOIDmode,
6b24c259
JH
8786 condition, target1, target2)));
8787 if (probability >= 0)
8788 REG_NOTES (i)
8789 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8790 GEN_INT (probability),
8791 REG_NOTES (i));
8792 if (second != NULL_RTX)
9e7adcb3 8793 {
6b24c259
JH
8794 i = emit_jump_insn (gen_rtx_SET
8795 (VOIDmode, pc_rtx,
8796 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8797 target2)));
8798 if (second_probability >= 0)
8799 REG_NOTES (i)
8800 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8801 GEN_INT (second_probability),
8802 REG_NOTES (i));
9e7adcb3 8803 }
9e7adcb3
JH
8804 if (label != NULL_RTX)
8805 emit_label (label);
8806}
8807
32b5b1aa 8808int
3a3677ff 8809ix86_expand_setcc (code, dest)
e075ae69 8810 enum rtx_code code;
e075ae69 8811 rtx dest;
32b5b1aa 8812{
a1b8572c
JH
8813 rtx ret, tmp, tmpreg;
8814 rtx second_test, bypass_test;
e075ae69 8815
885a70fd
JH
8816 if (GET_MODE (ix86_compare_op0) == DImode
8817 && !TARGET_64BIT)
e075ae69
RH
8818 return 0; /* FAIL */
8819
b932f770
JH
8820 if (GET_MODE (dest) != QImode)
8821 abort ();
e075ae69 8822
a1b8572c 8823 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
8824 PUT_MODE (ret, QImode);
8825
8826 tmp = dest;
a1b8572c 8827 tmpreg = dest;
32b5b1aa 8828
e075ae69 8829 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
8830 if (bypass_test || second_test)
8831 {
8832 rtx test = second_test;
8833 int bypass = 0;
8834 rtx tmp2 = gen_reg_rtx (QImode);
8835 if (bypass_test)
8836 {
8837 if (second_test)
b531087a 8838 abort ();
a1b8572c
JH
8839 test = bypass_test;
8840 bypass = 1;
8841 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8842 }
8843 PUT_MODE (test, QImode);
8844 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8845
8846 if (bypass)
8847 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8848 else
8849 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8850 }
e075ae69 8851
e075ae69 8852 return 1; /* DONE */
32b5b1aa 8853}
e075ae69 8854
32b5b1aa 8855int
e075ae69
RH
8856ix86_expand_int_movcc (operands)
8857 rtx operands[];
32b5b1aa 8858{
e075ae69
RH
8859 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8860 rtx compare_seq, compare_op;
a1b8572c 8861 rtx second_test, bypass_test;
635559ab 8862 enum machine_mode mode = GET_MODE (operands[0]);
32b5b1aa 8863
36583fea
JH
8864 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8865 In case comparsion is done with immediate, we can convert it to LTU or
8866 GEU by altering the integer. */
8867
8868 if ((code == LEU || code == GTU)
8869 && GET_CODE (ix86_compare_op1) == CONST_INT
635559ab 8870 && mode != HImode
261376e7
RH
8871 && INTVAL (ix86_compare_op1) != -1
8872 /* For x86-64, the immediate field in the instruction is 32-bit
8873 signed, so we can't increment a DImode value above 0x7fffffff. */
74411039
JH
8874 && (!TARGET_64BIT
8875 || GET_MODE (ix86_compare_op0) != DImode
261376e7 8876 || INTVAL (ix86_compare_op1) != 0x7fffffff)
0f290768 8877 && GET_CODE (operands[2]) == CONST_INT
36583fea
JH
8878 && GET_CODE (operands[3]) == CONST_INT)
8879 {
8880 if (code == LEU)
8881 code = LTU;
8882 else
8883 code = GEU;
261376e7
RH
8884 ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
8885 GET_MODE (ix86_compare_op0));
36583fea 8886 }
3a3677ff 8887
e075ae69 8888 start_sequence ();
a1b8572c 8889 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
2f937369 8890 compare_seq = get_insns ();
e075ae69
RH
8891 end_sequence ();
8892
8893 compare_code = GET_CODE (compare_op);
8894
8895 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8896 HImode insns, we'd be swallowed in word prefix ops. */
8897
635559ab
JH
8898 if (mode != HImode
8899 && (mode != DImode || TARGET_64BIT)
0f290768 8900 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
8901 && GET_CODE (operands[3]) == CONST_INT)
8902 {
8903 rtx out = operands[0];
8904 HOST_WIDE_INT ct = INTVAL (operands[2]);
8905 HOST_WIDE_INT cf = INTVAL (operands[3]);
8906 HOST_WIDE_INT diff;
8907
a1b8572c
JH
8908 if ((compare_code == LTU || compare_code == GEU)
8909 && !second_test && !bypass_test)
e075ae69 8910 {
e075ae69
RH
8911 /* Detect overlap between destination and compare sources. */
8912 rtx tmp = out;
8913
0f290768 8914 /* To simplify rest of code, restrict to the GEU case. */
36583fea
JH
8915 if (compare_code == LTU)
8916 {
8917 int tmp = ct;
8918 ct = cf;
8919 cf = tmp;
8920 compare_code = reverse_condition (compare_code);
8921 code = reverse_condition (code);
8922 }
8923 diff = ct - cf;
8924
e075ae69 8925 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
a500c31b 8926 || reg_overlap_mentioned_p (out, ix86_compare_op1))
635559ab 8927 tmp = gen_reg_rtx (mode);
e075ae69
RH
8928
8929 emit_insn (compare_seq);
635559ab 8930 if (mode == DImode)
14f73b5a
JH
8931 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
8932 else
8933 emit_insn (gen_x86_movsicc_0_m1 (tmp));
e075ae69 8934
36583fea
JH
8935 if (diff == 1)
8936 {
8937 /*
8938 * cmpl op0,op1
8939 * sbbl dest,dest
8940 * [addl dest, ct]
8941 *
8942 * Size 5 - 8.
8943 */
8944 if (ct)
635559ab
JH
8945 tmp = expand_simple_binop (mode, PLUS,
8946 tmp, GEN_INT (ct),
8947 tmp, 1, OPTAB_DIRECT);
36583fea
JH
8948 }
8949 else if (cf == -1)
8950 {
8951 /*
8952 * cmpl op0,op1
8953 * sbbl dest,dest
8954 * orl $ct, dest
8955 *
8956 * Size 8.
8957 */
635559ab
JH
8958 tmp = expand_simple_binop (mode, IOR,
8959 tmp, GEN_INT (ct),
8960 tmp, 1, OPTAB_DIRECT);
36583fea
JH
8961 }
8962 else if (diff == -1 && ct)
8963 {
8964 /*
8965 * cmpl op0,op1
8966 * sbbl dest,dest
06ec023f 8967 * notl dest
36583fea
JH
8968 * [addl dest, cf]
8969 *
8970 * Size 8 - 11.
8971 */
635559ab
JH
8972 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8973 if (cf)
8974 tmp = expand_simple_binop (mode, PLUS,
8975 tmp, GEN_INT (cf),
8976 tmp, 1, OPTAB_DIRECT);
36583fea
JH
8977 }
8978 else
8979 {
8980 /*
8981 * cmpl op0,op1
8982 * sbbl dest,dest
06ec023f 8983 * [notl dest]
36583fea
JH
8984 * andl cf - ct, dest
8985 * [addl dest, ct]
8986 *
8987 * Size 8 - 11.
8988 */
06ec023f
RB
8989
8990 if (cf == 0)
8991 {
8992 cf = ct;
8993 ct = 0;
8994 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8995 }
8996
635559ab
JH
8997 tmp = expand_simple_binop (mode, AND,
8998 tmp,
d8bf17f9 8999 gen_int_mode (cf - ct, mode),
635559ab
JH
9000 tmp, 1, OPTAB_DIRECT);
9001 if (ct)
9002 tmp = expand_simple_binop (mode, PLUS,
9003 tmp, GEN_INT (ct),
9004 tmp, 1, OPTAB_DIRECT);
36583fea 9005 }
e075ae69
RH
9006
9007 if (tmp != out)
9008 emit_move_insn (out, tmp);
9009
9010 return 1; /* DONE */
9011 }
9012
9013 diff = ct - cf;
9014 if (diff < 0)
9015 {
9016 HOST_WIDE_INT tmp;
9017 tmp = ct, ct = cf, cf = tmp;
9018 diff = -diff;
734dba19
JH
9019 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9020 {
9021 /* We may be reversing unordered compare to normal compare, that
9022 is not valid in general (we may convert non-trapping condition
9023 to trapping one), however on i386 we currently emit all
9024 comparisons unordered. */
9025 compare_code = reverse_condition_maybe_unordered (compare_code);
9026 code = reverse_condition_maybe_unordered (code);
9027 }
9028 else
9029 {
9030 compare_code = reverse_condition (compare_code);
9031 code = reverse_condition (code);
9032 }
e075ae69 9033 }
0f2a3457
JJ
9034
9035 compare_code = NIL;
9036 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9037 && GET_CODE (ix86_compare_op1) == CONST_INT)
9038 {
9039 if (ix86_compare_op1 == const0_rtx
9040 && (code == LT || code == GE))
9041 compare_code = code;
9042 else if (ix86_compare_op1 == constm1_rtx)
9043 {
9044 if (code == LE)
9045 compare_code = LT;
9046 else if (code == GT)
9047 compare_code = GE;
9048 }
9049 }
9050
9051 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9052 if (compare_code != NIL
9053 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9054 && (cf == -1 || ct == -1))
9055 {
9056 /* If lea code below could be used, only optimize
9057 if it results in a 2 insn sequence. */
9058
9059 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9060 || diff == 3 || diff == 5 || diff == 9)
9061 || (compare_code == LT && ct == -1)
9062 || (compare_code == GE && cf == -1))
9063 {
9064 /*
9065 * notl op1 (if necessary)
9066 * sarl $31, op1
9067 * orl cf, op1
9068 */
9069 if (ct != -1)
9070 {
9071 cf = ct;
9072 ct = -1;
9073 code = reverse_condition (code);
9074 }
9075
9076 out = emit_store_flag (out, code, ix86_compare_op0,
9077 ix86_compare_op1, VOIDmode, 0, -1);
9078
9079 out = expand_simple_binop (mode, IOR,
9080 out, GEN_INT (cf),
9081 out, 1, OPTAB_DIRECT);
9082 if (out != operands[0])
9083 emit_move_insn (operands[0], out);
9084
9085 return 1; /* DONE */
9086 }
9087 }
9088
635559ab
JH
9089 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9090 || diff == 3 || diff == 5 || diff == 9)
9091 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
e075ae69
RH
9092 {
9093 /*
9094 * xorl dest,dest
9095 * cmpl op1,op2
9096 * setcc dest
9097 * lea cf(dest*(ct-cf)),dest
9098 *
9099 * Size 14.
9100 *
9101 * This also catches the degenerate setcc-only case.
9102 */
9103
9104 rtx tmp;
9105 int nops;
9106
9107 out = emit_store_flag (out, code, ix86_compare_op0,
9108 ix86_compare_op1, VOIDmode, 0, 1);
9109
9110 nops = 0;
97f51ac4
RB
9111 /* On x86_64 the lea instruction operates on Pmode, so we need
9112 to get arithmetics done in proper mode to match. */
e075ae69 9113 if (diff == 1)
14f73b5a 9114 tmp = out;
e075ae69
RH
9115 else
9116 {
885a70fd 9117 rtx out1;
14f73b5a 9118 out1 = out;
635559ab 9119 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
e075ae69
RH
9120 nops++;
9121 if (diff & 1)
9122 {
635559ab 9123 tmp = gen_rtx_PLUS (mode, tmp, out1);
e075ae69
RH
9124 nops++;
9125 }
9126 }
9127 if (cf != 0)
9128 {
635559ab 9129 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
e075ae69
RH
9130 nops++;
9131 }
885a70fd
JH
9132 if (tmp != out
9133 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
e075ae69 9134 {
14f73b5a 9135 if (nops == 1)
e075ae69
RH
9136 {
9137 rtx clob;
9138
9139 clob = gen_rtx_REG (CCmode, FLAGS_REG);
9140 clob = gen_rtx_CLOBBER (VOIDmode, clob);
9141
9142 tmp = gen_rtx_SET (VOIDmode, out, tmp);
9143 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9144 emit_insn (tmp);
9145 }
9146 else
9147 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
9148 }
9149 if (out != operands[0])
1985ef90 9150 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
9151
9152 return 1; /* DONE */
9153 }
9154
9155 /*
9156 * General case: Jumpful:
9157 * xorl dest,dest cmpl op1, op2
9158 * cmpl op1, op2 movl ct, dest
9159 * setcc dest jcc 1f
9160 * decl dest movl cf, dest
9161 * andl (cf-ct),dest 1:
9162 * addl ct,dest
0f290768 9163 *
e075ae69
RH
9164 * Size 20. Size 14.
9165 *
9166 * This is reasonably steep, but branch mispredict costs are
9167 * high on modern cpus, so consider failing only if optimizing
9168 * for space.
9169 *
9170 * %%% Parameterize branch_cost on the tuning architecture, then
9171 * use that. The 80386 couldn't care less about mispredicts.
9172 */
9173
9174 if (!optimize_size && !TARGET_CMOVE)
9175 {
97f51ac4 9176 if (cf == 0)
e075ae69 9177 {
97f51ac4
RB
9178 cf = ct;
9179 ct = 0;
734dba19 9180 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
0f2a3457
JJ
9181 /* We may be reversing unordered compare to normal compare,
9182 that is not valid in general (we may convert non-trapping
9183 condition to trapping one), however on i386 we currently
9184 emit all comparisons unordered. */
9185 code = reverse_condition_maybe_unordered (code);
9186 else
9187 {
9188 code = reverse_condition (code);
9189 if (compare_code != NIL)
9190 compare_code = reverse_condition (compare_code);
9191 }
9192 }
9193
9194 if (compare_code != NIL)
9195 {
9196 /* notl op1 (if needed)
9197 sarl $31, op1
9198 andl (cf-ct), op1
9199 addl ct, op1
9200
9201 For x < 0 (resp. x <= -1) there will be no notl,
9202 so if possible swap the constants to get rid of the
9203 complement.
9204 True/false will be -1/0 while code below (store flag
9205 followed by decrement) is 0/-1, so the constants need
9206 to be exchanged once more. */
9207
9208 if (compare_code == GE || !cf)
734dba19 9209 {
0f2a3457
JJ
9210 code = reverse_condition (code);
9211 compare_code = LT;
734dba19
JH
9212 }
9213 else
9214 {
0f2a3457
JJ
9215 HOST_WIDE_INT tmp = cf;
9216 cf = ct;
9217 ct = tmp;
734dba19 9218 }
0f2a3457
JJ
9219
9220 out = emit_store_flag (out, code, ix86_compare_op0,
9221 ix86_compare_op1, VOIDmode, 0, -1);
e075ae69 9222 }
0f2a3457
JJ
9223 else
9224 {
9225 out = emit_store_flag (out, code, ix86_compare_op0,
9226 ix86_compare_op1, VOIDmode, 0, 1);
e075ae69 9227
97f51ac4 9228 out = expand_simple_binop (mode, PLUS, out, constm1_rtx,
0f2a3457
JJ
9229 out, 1, OPTAB_DIRECT);
9230 }
e075ae69 9231
97f51ac4 9232 out = expand_simple_binop (mode, AND, out,
d8bf17f9 9233 gen_int_mode (cf - ct, mode),
635559ab 9234 out, 1, OPTAB_DIRECT);
97f51ac4
RB
9235 if (ct)
9236 out = expand_simple_binop (mode, PLUS, out, GEN_INT (ct),
9237 out, 1, OPTAB_DIRECT);
e075ae69
RH
9238 if (out != operands[0])
9239 emit_move_insn (operands[0], out);
9240
9241 return 1; /* DONE */
9242 }
9243 }
9244
9245 if (!TARGET_CMOVE)
9246 {
9247 /* Try a few things more with specific constants and a variable. */
9248
78a0d70c 9249 optab op;
e075ae69
RH
9250 rtx var, orig_out, out, tmp;
9251
9252 if (optimize_size)
9253 return 0; /* FAIL */
9254
0f290768 9255 /* If one of the two operands is an interesting constant, load a
e075ae69 9256 constant with the above and mask it in with a logical operation. */
0f290768 9257
e075ae69
RH
9258 if (GET_CODE (operands[2]) == CONST_INT)
9259 {
9260 var = operands[3];
9261 if (INTVAL (operands[2]) == 0)
9262 operands[3] = constm1_rtx, op = and_optab;
9263 else if (INTVAL (operands[2]) == -1)
9264 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
9265 else
9266 return 0; /* FAIL */
e075ae69
RH
9267 }
9268 else if (GET_CODE (operands[3]) == CONST_INT)
9269 {
9270 var = operands[2];
9271 if (INTVAL (operands[3]) == 0)
9272 operands[2] = constm1_rtx, op = and_optab;
9273 else if (INTVAL (operands[3]) == -1)
9274 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
9275 else
9276 return 0; /* FAIL */
e075ae69 9277 }
78a0d70c 9278 else
e075ae69
RH
9279 return 0; /* FAIL */
9280
9281 orig_out = operands[0];
635559ab 9282 tmp = gen_reg_rtx (mode);
e075ae69
RH
9283 operands[0] = tmp;
9284
9285 /* Recurse to get the constant loaded. */
9286 if (ix86_expand_int_movcc (operands) == 0)
9287 return 0; /* FAIL */
9288
9289 /* Mask in the interesting variable. */
635559ab 9290 out = expand_binop (mode, op, var, tmp, orig_out, 0,
e075ae69
RH
9291 OPTAB_WIDEN);
9292 if (out != orig_out)
9293 emit_move_insn (orig_out, out);
9294
9295 return 1; /* DONE */
9296 }
9297
9298 /*
9299 * For comparison with above,
9300 *
9301 * movl cf,dest
9302 * movl ct,tmp
9303 * cmpl op1,op2
9304 * cmovcc tmp,dest
9305 *
9306 * Size 15.
9307 */
9308
635559ab
JH
9309 if (! nonimmediate_operand (operands[2], mode))
9310 operands[2] = force_reg (mode, operands[2]);
9311 if (! nonimmediate_operand (operands[3], mode))
9312 operands[3] = force_reg (mode, operands[3]);
e075ae69 9313
a1b8572c
JH
9314 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9315 {
635559ab 9316 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
9317 emit_move_insn (tmp, operands[3]);
9318 operands[3] = tmp;
9319 }
9320 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9321 {
635559ab 9322 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
9323 emit_move_insn (tmp, operands[2]);
9324 operands[2] = tmp;
9325 }
c9682caf
JH
9326 if (! register_operand (operands[2], VOIDmode)
9327 && ! register_operand (operands[3], VOIDmode))
635559ab 9328 operands[2] = force_reg (mode, operands[2]);
a1b8572c 9329
e075ae69
RH
9330 emit_insn (compare_seq);
9331 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 9332 gen_rtx_IF_THEN_ELSE (mode,
e075ae69
RH
9333 compare_op, operands[2],
9334 operands[3])));
a1b8572c
JH
9335 if (bypass_test)
9336 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 9337 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c
JH
9338 bypass_test,
9339 operands[3],
9340 operands[0])));
9341 if (second_test)
9342 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 9343 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c
JH
9344 second_test,
9345 operands[2],
9346 operands[0])));
e075ae69
RH
9347
9348 return 1; /* DONE */
e9a25f70 9349}
e075ae69 9350
32b5b1aa 9351int
e075ae69
RH
9352ix86_expand_fp_movcc (operands)
9353 rtx operands[];
32b5b1aa 9354{
e075ae69 9355 enum rtx_code code;
e075ae69 9356 rtx tmp;
a1b8572c 9357 rtx compare_op, second_test, bypass_test;
32b5b1aa 9358
0073023d
JH
9359 /* For SF/DFmode conditional moves based on comparisons
9360 in same mode, we may want to use SSE min/max instructions. */
965f5423
JH
9361 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9362 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
0073023d 9363 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
fa9f36a1
JH
9364 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9365 && (!TARGET_IEEE_FP
9366 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
0073023d
JH
9367 /* We may be called from the post-reload splitter. */
9368 && (!REG_P (operands[0])
9369 || SSE_REG_P (operands[0])
52a661a6 9370 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
0073023d
JH
9371 {
9372 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9373 code = GET_CODE (operands[1]);
9374
9375 /* See if we have (cross) match between comparison operands and
9376 conditional move operands. */
9377 if (rtx_equal_p (operands[2], op1))
9378 {
9379 rtx tmp = op0;
9380 op0 = op1;
9381 op1 = tmp;
9382 code = reverse_condition_maybe_unordered (code);
9383 }
9384 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9385 {
9386 /* Check for min operation. */
9387 if (code == LT)
9388 {
9389 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9390 if (memory_operand (op0, VOIDmode))
9391 op0 = force_reg (GET_MODE (operands[0]), op0);
9392 if (GET_MODE (operands[0]) == SFmode)
9393 emit_insn (gen_minsf3 (operands[0], op0, op1));
9394 else
9395 emit_insn (gen_mindf3 (operands[0], op0, op1));
9396 return 1;
9397 }
9398 /* Check for max operation. */
9399 if (code == GT)
9400 {
9401 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9402 if (memory_operand (op0, VOIDmode))
9403 op0 = force_reg (GET_MODE (operands[0]), op0);
9404 if (GET_MODE (operands[0]) == SFmode)
9405 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9406 else
9407 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9408 return 1;
9409 }
9410 }
9411 /* Manage condition to be sse_comparison_operator. In case we are
9412 in non-ieee mode, try to canonicalize the destination operand
9413 to be first in the comparison - this helps reload to avoid extra
9414 moves. */
9415 if (!sse_comparison_operator (operands[1], VOIDmode)
9416 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9417 {
9418 rtx tmp = ix86_compare_op0;
9419 ix86_compare_op0 = ix86_compare_op1;
9420 ix86_compare_op1 = tmp;
9421 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9422 VOIDmode, ix86_compare_op0,
9423 ix86_compare_op1);
9424 }
9425 /* Similary try to manage result to be first operand of conditional
fa9f36a1
JH
9426 move. We also don't support the NE comparison on SSE, so try to
9427 avoid it. */
037f20f1
JH
9428 if ((rtx_equal_p (operands[0], operands[3])
9429 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9430 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
0073023d
JH
9431 {
9432 rtx tmp = operands[2];
9433 operands[2] = operands[3];
92d0fb09 9434 operands[3] = tmp;
0073023d
JH
9435 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9436 (GET_CODE (operands[1])),
9437 VOIDmode, ix86_compare_op0,
9438 ix86_compare_op1);
9439 }
9440 if (GET_MODE (operands[0]) == SFmode)
9441 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9442 operands[2], operands[3],
9443 ix86_compare_op0, ix86_compare_op1));
9444 else
9445 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9446 operands[2], operands[3],
9447 ix86_compare_op0, ix86_compare_op1));
9448 return 1;
9449 }
9450
e075ae69 9451 /* The floating point conditional move instructions don't directly
0f290768 9452 support conditions resulting from a signed integer comparison. */
32b5b1aa 9453
e075ae69 9454 code = GET_CODE (operands[1]);
a1b8572c 9455 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
9456
9457 /* The floating point conditional move instructions don't directly
9458 support signed integer comparisons. */
9459
a1b8572c 9460 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 9461 {
a1b8572c 9462 if (second_test != NULL || bypass_test != NULL)
b531087a 9463 abort ();
e075ae69 9464 tmp = gen_reg_rtx (QImode);
3a3677ff 9465 ix86_expand_setcc (code, tmp);
e075ae69
RH
9466 code = NE;
9467 ix86_compare_op0 = tmp;
9468 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
9469 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9470 }
9471 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9472 {
9473 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9474 emit_move_insn (tmp, operands[3]);
9475 operands[3] = tmp;
9476 }
9477 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9478 {
9479 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9480 emit_move_insn (tmp, operands[2]);
9481 operands[2] = tmp;
e075ae69 9482 }
e9a25f70 9483
e075ae69
RH
9484 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9485 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9e7adcb3 9486 compare_op,
e075ae69
RH
9487 operands[2],
9488 operands[3])));
a1b8572c
JH
9489 if (bypass_test)
9490 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9491 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9492 bypass_test,
9493 operands[3],
9494 operands[0])));
9495 if (second_test)
9496 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9497 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9498 second_test,
9499 operands[2],
9500 operands[0])));
32b5b1aa 9501
e075ae69 9502 return 1;
32b5b1aa
SC
9503}
9504
2450a057
JH
9505/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9506 works for floating pointer parameters and nonoffsetable memories.
9507 For pushes, it returns just stack offsets; the values will be saved
9508 in the right order. Maximally three parts are generated. */
9509
2b589241 9510static int
2450a057
JH
9511ix86_split_to_parts (operand, parts, mode)
9512 rtx operand;
9513 rtx *parts;
9514 enum machine_mode mode;
32b5b1aa 9515{
26e5b205
JH
9516 int size;
9517
9518 if (!TARGET_64BIT)
9519 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9520 else
9521 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 9522
a7180f70
BS
9523 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9524 abort ();
2450a057
JH
9525 if (size < 2 || size > 3)
9526 abort ();
9527
f996902d
RH
9528 /* Optimize constant pool reference to immediates. This is used by fp
9529 moves, that force all constants to memory to allow combining. */
9530 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9531 {
9532 rtx tmp = maybe_get_pool_constant (operand);
9533 if (tmp)
9534 operand = tmp;
9535 }
d7a29404 9536
2450a057 9537 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 9538 {
2450a057
JH
9539 /* The only non-offsetable memories we handle are pushes. */
9540 if (! push_operand (operand, VOIDmode))
9541 abort ();
9542
26e5b205
JH
9543 operand = copy_rtx (operand);
9544 PUT_MODE (operand, Pmode);
2450a057
JH
9545 parts[0] = parts[1] = parts[2] = operand;
9546 }
26e5b205 9547 else if (!TARGET_64BIT)
2450a057
JH
9548 {
9549 if (mode == DImode)
9550 split_di (&operand, 1, &parts[0], &parts[1]);
9551 else
e075ae69 9552 {
2450a057
JH
9553 if (REG_P (operand))
9554 {
9555 if (!reload_completed)
9556 abort ();
9557 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9558 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9559 if (size == 3)
9560 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9561 }
9562 else if (offsettable_memref_p (operand))
9563 {
f4ef873c 9564 operand = adjust_address (operand, SImode, 0);
2450a057 9565 parts[0] = operand;
b72f00af 9566 parts[1] = adjust_address (operand, SImode, 4);
2450a057 9567 if (size == 3)
b72f00af 9568 parts[2] = adjust_address (operand, SImode, 8);
2450a057
JH
9569 }
9570 else if (GET_CODE (operand) == CONST_DOUBLE)
9571 {
9572 REAL_VALUE_TYPE r;
2b589241 9573 long l[4];
2450a057
JH
9574
9575 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9576 switch (mode)
9577 {
9578 case XFmode:
2b589241 9579 case TFmode:
2450a057 9580 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
d8bf17f9 9581 parts[2] = gen_int_mode (l[2], SImode);
2450a057
JH
9582 break;
9583 case DFmode:
9584 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9585 break;
9586 default:
9587 abort ();
9588 }
d8bf17f9
LB
9589 parts[1] = gen_int_mode (l[1], SImode);
9590 parts[0] = gen_int_mode (l[0], SImode);
2450a057
JH
9591 }
9592 else
9593 abort ();
e075ae69 9594 }
2450a057 9595 }
26e5b205
JH
9596 else
9597 {
44cf5b6a
JH
9598 if (mode == TImode)
9599 split_ti (&operand, 1, &parts[0], &parts[1]);
26e5b205
JH
9600 if (mode == XFmode || mode == TFmode)
9601 {
9602 if (REG_P (operand))
9603 {
9604 if (!reload_completed)
9605 abort ();
9606 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9607 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9608 }
9609 else if (offsettable_memref_p (operand))
9610 {
b72f00af 9611 operand = adjust_address (operand, DImode, 0);
26e5b205 9612 parts[0] = operand;
b72f00af 9613 parts[1] = adjust_address (operand, SImode, 8);
26e5b205
JH
9614 }
9615 else if (GET_CODE (operand) == CONST_DOUBLE)
9616 {
9617 REAL_VALUE_TYPE r;
9618 long l[3];
9619
9620 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9621 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9622 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9623 if (HOST_BITS_PER_WIDE_INT >= 64)
523fbd9d 9624 parts[0]
d8bf17f9 9625 = gen_int_mode
44cf5b6a 9626 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
b531087a 9627 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
d8bf17f9 9628 DImode);
26e5b205
JH
9629 else
9630 parts[0] = immed_double_const (l[0], l[1], DImode);
d8bf17f9 9631 parts[1] = gen_int_mode (l[2], SImode);
26e5b205
JH
9632 }
9633 else
9634 abort ();
9635 }
9636 }
2450a057 9637
2b589241 9638 return size;
2450a057
JH
9639}
9640
9641/* Emit insns to perform a move or push of DI, DF, and XF values.
9642 Return false when normal moves are needed; true when all required
9643 insns have been emitted. Operands 2-4 contain the input values
9644 int the correct order; operands 5-7 contain the output values. */
9645
26e5b205
JH
9646void
9647ix86_split_long_move (operands)
9648 rtx operands[];
2450a057
JH
9649{
9650 rtx part[2][3];
26e5b205 9651 int nparts;
2450a057
JH
9652 int push = 0;
9653 int collisions = 0;
26e5b205
JH
9654 enum machine_mode mode = GET_MODE (operands[0]);
9655
9656 /* The DFmode expanders may ask us to move double.
9657 For 64bit target this is single move. By hiding the fact
9658 here we simplify i386.md splitters. */
9659 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9660 {
8cdfa312
RH
9661 /* Optimize constant pool reference to immediates. This is used by
9662 fp moves, that force all constants to memory to allow combining. */
26e5b205
JH
9663
9664 if (GET_CODE (operands[1]) == MEM
9665 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9666 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9667 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9668 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
9669 {
9670 operands[0] = copy_rtx (operands[0]);
9671 PUT_MODE (operands[0], Pmode);
9672 }
26e5b205
JH
9673 else
9674 operands[0] = gen_lowpart (DImode, operands[0]);
9675 operands[1] = gen_lowpart (DImode, operands[1]);
9676 emit_move_insn (operands[0], operands[1]);
9677 return;
9678 }
2450a057 9679
2450a057
JH
9680 /* The only non-offsettable memory we handle is push. */
9681 if (push_operand (operands[0], VOIDmode))
9682 push = 1;
9683 else if (GET_CODE (operands[0]) == MEM
9684 && ! offsettable_memref_p (operands[0]))
9685 abort ();
9686
26e5b205
JH
9687 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9688 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
9689
9690 /* When emitting push, take care for source operands on the stack. */
9691 if (push && GET_CODE (operands[1]) == MEM
9692 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9693 {
26e5b205 9694 if (nparts == 3)
886cbb88
JH
9695 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9696 XEXP (part[1][2], 0));
9697 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9698 XEXP (part[1][1], 0));
2450a057
JH
9699 }
9700
0f290768 9701 /* We need to do copy in the right order in case an address register
2450a057
JH
9702 of the source overlaps the destination. */
9703 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9704 {
9705 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9706 collisions++;
9707 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9708 collisions++;
26e5b205 9709 if (nparts == 3
2450a057
JH
9710 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9711 collisions++;
9712
9713 /* Collision in the middle part can be handled by reordering. */
26e5b205 9714 if (collisions == 1 && nparts == 3
2450a057 9715 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 9716 {
2450a057
JH
9717 rtx tmp;
9718 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9719 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9720 }
e075ae69 9721
2450a057
JH
9722 /* If there are more collisions, we can't handle it by reordering.
9723 Do an lea to the last part and use only one colliding move. */
9724 else if (collisions > 1)
9725 {
9726 collisions = 1;
26e5b205 9727 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
2450a057 9728 XEXP (part[1][0], 0)));
26e5b205
JH
9729 part[1][0] = change_address (part[1][0],
9730 TARGET_64BIT ? DImode : SImode,
9731 part[0][nparts - 1]);
b72f00af 9732 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
26e5b205 9733 if (nparts == 3)
b72f00af 9734 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
2450a057
JH
9735 }
9736 }
9737
9738 if (push)
9739 {
26e5b205 9740 if (!TARGET_64BIT)
2b589241 9741 {
26e5b205
JH
9742 if (nparts == 3)
9743 {
9744 /* We use only first 12 bytes of TFmode value, but for pushing we
9745 are required to adjust stack as if we were pushing real 16byte
9746 value. */
9747 if (mode == TFmode && !TARGET_64BIT)
9748 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
9749 GEN_INT (-4)));
9750 emit_move_insn (part[0][2], part[1][2]);
9751 }
2b589241 9752 }
26e5b205
JH
9753 else
9754 {
9755 /* In 64bit mode we don't have 32bit push available. In case this is
9756 register, it is OK - we will just use larger counterpart. We also
9757 retype memory - these comes from attempt to avoid REX prefix on
9758 moving of second half of TFmode value. */
9759 if (GET_MODE (part[1][1]) == SImode)
9760 {
9761 if (GET_CODE (part[1][1]) == MEM)
f4ef873c 9762 part[1][1] = adjust_address (part[1][1], DImode, 0);
26e5b205
JH
9763 else if (REG_P (part[1][1]))
9764 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9765 else
b531087a 9766 abort ();
886cbb88
JH
9767 if (GET_MODE (part[1][0]) == SImode)
9768 part[1][0] = part[1][1];
26e5b205
JH
9769 }
9770 }
9771 emit_move_insn (part[0][1], part[1][1]);
9772 emit_move_insn (part[0][0], part[1][0]);
9773 return;
2450a057
JH
9774 }
9775
9776 /* Choose correct order to not overwrite the source before it is copied. */
9777 if ((REG_P (part[0][0])
9778 && REG_P (part[1][1])
9779 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 9780 || (nparts == 3
2450a057
JH
9781 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9782 || (collisions > 0
9783 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9784 {
26e5b205 9785 if (nparts == 3)
2450a057 9786 {
26e5b205
JH
9787 operands[2] = part[0][2];
9788 operands[3] = part[0][1];
9789 operands[4] = part[0][0];
9790 operands[5] = part[1][2];
9791 operands[6] = part[1][1];
9792 operands[7] = part[1][0];
2450a057
JH
9793 }
9794 else
9795 {
26e5b205
JH
9796 operands[2] = part[0][1];
9797 operands[3] = part[0][0];
9798 operands[5] = part[1][1];
9799 operands[6] = part[1][0];
2450a057
JH
9800 }
9801 }
9802 else
9803 {
26e5b205 9804 if (nparts == 3)
2450a057 9805 {
26e5b205
JH
9806 operands[2] = part[0][0];
9807 operands[3] = part[0][1];
9808 operands[4] = part[0][2];
9809 operands[5] = part[1][0];
9810 operands[6] = part[1][1];
9811 operands[7] = part[1][2];
2450a057
JH
9812 }
9813 else
9814 {
26e5b205
JH
9815 operands[2] = part[0][0];
9816 operands[3] = part[0][1];
9817 operands[5] = part[1][0];
9818 operands[6] = part[1][1];
e075ae69
RH
9819 }
9820 }
26e5b205
JH
9821 emit_move_insn (operands[2], operands[5]);
9822 emit_move_insn (operands[3], operands[6]);
9823 if (nparts == 3)
9824 emit_move_insn (operands[4], operands[7]);
32b5b1aa 9825
26e5b205 9826 return;
32b5b1aa 9827}
32b5b1aa 9828
e075ae69
RH
9829void
9830ix86_split_ashldi (operands, scratch)
9831 rtx *operands, scratch;
32b5b1aa 9832{
e075ae69
RH
9833 rtx low[2], high[2];
9834 int count;
b985a30f 9835
e075ae69
RH
9836 if (GET_CODE (operands[2]) == CONST_INT)
9837 {
9838 split_di (operands, 2, low, high);
9839 count = INTVAL (operands[2]) & 63;
32b5b1aa 9840
e075ae69
RH
9841 if (count >= 32)
9842 {
9843 emit_move_insn (high[0], low[1]);
9844 emit_move_insn (low[0], const0_rtx);
b985a30f 9845
e075ae69
RH
9846 if (count > 32)
9847 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
9848 }
9849 else
9850 {
9851 if (!rtx_equal_p (operands[0], operands[1]))
9852 emit_move_insn (operands[0], operands[1]);
9853 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
9854 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
9855 }
9856 }
9857 else
9858 {
9859 if (!rtx_equal_p (operands[0], operands[1]))
9860 emit_move_insn (operands[0], operands[1]);
b985a30f 9861
e075ae69 9862 split_di (operands, 1, low, high);
b985a30f 9863
e075ae69
RH
9864 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
9865 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 9866
fe577e58 9867 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 9868 {
fe577e58 9869 if (! no_new_pseudos)
e075ae69
RH
9870 scratch = force_reg (SImode, const0_rtx);
9871 else
9872 emit_move_insn (scratch, const0_rtx);
9873
9874 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
9875 scratch));
9876 }
9877 else
9878 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9879 }
e9a25f70 9880}
32b5b1aa 9881
e075ae69
RH
9882void
9883ix86_split_ashrdi (operands, scratch)
9884 rtx *operands, scratch;
32b5b1aa 9885{
e075ae69
RH
9886 rtx low[2], high[2];
9887 int count;
32b5b1aa 9888
e075ae69
RH
9889 if (GET_CODE (operands[2]) == CONST_INT)
9890 {
9891 split_di (operands, 2, low, high);
9892 count = INTVAL (operands[2]) & 63;
32b5b1aa 9893
e075ae69
RH
9894 if (count >= 32)
9895 {
9896 emit_move_insn (low[0], high[1]);
32b5b1aa 9897
e075ae69
RH
9898 if (! reload_completed)
9899 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
9900 else
9901 {
9902 emit_move_insn (high[0], low[0]);
9903 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9904 }
9905
9906 if (count > 32)
9907 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
9908 }
9909 else
9910 {
9911 if (!rtx_equal_p (operands[0], operands[1]))
9912 emit_move_insn (operands[0], operands[1]);
9913 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9914 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
9915 }
9916 }
9917 else
32b5b1aa 9918 {
e075ae69
RH
9919 if (!rtx_equal_p (operands[0], operands[1]))
9920 emit_move_insn (operands[0], operands[1]);
9921
9922 split_di (operands, 1, low, high);
9923
9924 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9925 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
9926
fe577e58 9927 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 9928 {
fe577e58 9929 if (! no_new_pseudos)
e075ae69
RH
9930 scratch = gen_reg_rtx (SImode);
9931 emit_move_insn (scratch, high[0]);
9932 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
9933 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9934 scratch));
9935 }
9936 else
9937 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 9938 }
e075ae69 9939}
32b5b1aa 9940
e075ae69
RH
9941void
9942ix86_split_lshrdi (operands, scratch)
9943 rtx *operands, scratch;
9944{
9945 rtx low[2], high[2];
9946 int count;
32b5b1aa 9947
e075ae69 9948 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 9949 {
e075ae69
RH
9950 split_di (operands, 2, low, high);
9951 count = INTVAL (operands[2]) & 63;
9952
9953 if (count >= 32)
c7271385 9954 {
e075ae69
RH
9955 emit_move_insn (low[0], high[1]);
9956 emit_move_insn (high[0], const0_rtx);
32b5b1aa 9957
e075ae69
RH
9958 if (count > 32)
9959 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
9960 }
9961 else
9962 {
9963 if (!rtx_equal_p (operands[0], operands[1]))
9964 emit_move_insn (operands[0], operands[1]);
9965 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9966 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
9967 }
32b5b1aa 9968 }
e075ae69
RH
9969 else
9970 {
9971 if (!rtx_equal_p (operands[0], operands[1]))
9972 emit_move_insn (operands[0], operands[1]);
32b5b1aa 9973
e075ae69
RH
9974 split_di (operands, 1, low, high);
9975
9976 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9977 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
9978
9979 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 9980 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 9981 {
fe577e58 9982 if (! no_new_pseudos)
e075ae69
RH
9983 scratch = force_reg (SImode, const0_rtx);
9984 else
9985 emit_move_insn (scratch, const0_rtx);
9986
9987 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9988 scratch));
9989 }
9990 else
9991 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
9992 }
32b5b1aa 9993}
3f803cd9 9994
0407c02b 9995/* Helper function for the string operations below. Dest VARIABLE whether
0945b39d
JH
9996 it is aligned to VALUE bytes. If true, jump to the label. */
9997static rtx
9998ix86_expand_aligntest (variable, value)
9999 rtx variable;
10000 int value;
10001{
10002 rtx label = gen_label_rtx ();
10003 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10004 if (GET_MODE (variable) == DImode)
10005 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10006 else
10007 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10008 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
d43e0b7d 10009 1, label);
0945b39d
JH
10010 return label;
10011}
10012
10013/* Adjust COUNTER by the VALUE. */
10014static void
10015ix86_adjust_counter (countreg, value)
10016 rtx countreg;
10017 HOST_WIDE_INT value;
10018{
10019 if (GET_MODE (countreg) == DImode)
10020 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10021 else
10022 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10023}
10024
10025/* Zero extend possibly SImode EXP to Pmode register. */
d24b3457 10026rtx
0945b39d
JH
10027ix86_zero_extend_to_Pmode (exp)
10028 rtx exp;
10029{
10030 rtx r;
10031 if (GET_MODE (exp) == VOIDmode)
10032 return force_reg (Pmode, exp);
10033 if (GET_MODE (exp) == Pmode)
10034 return copy_to_mode_reg (Pmode, exp);
10035 r = gen_reg_rtx (Pmode);
10036 emit_insn (gen_zero_extendsidi2 (r, exp));
10037 return r;
10038}
10039
10040/* Expand string move (memcpy) operation. Use i386 string operations when
10041 profitable. expand_clrstr contains similar code. */
10042int
10043ix86_expand_movstr (dst, src, count_exp, align_exp)
10044 rtx dst, src, count_exp, align_exp;
10045{
10046 rtx srcreg, destreg, countreg;
10047 enum machine_mode counter_mode;
10048 HOST_WIDE_INT align = 0;
10049 unsigned HOST_WIDE_INT count = 0;
10050 rtx insns;
10051
10052 start_sequence ();
10053
10054 if (GET_CODE (align_exp) == CONST_INT)
10055 align = INTVAL (align_exp);
10056
5519a4f9 10057 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
10058 if (!TARGET_ALIGN_STRINGOPS)
10059 align = 64;
10060
10061 if (GET_CODE (count_exp) == CONST_INT)
10062 count = INTVAL (count_exp);
10063
10064 /* Figure out proper mode for counter. For 32bits it is always SImode,
10065 for 64bits use SImode when possible, otherwise DImode.
10066 Set count to number of bytes copied when known at compile time. */
10067 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10068 || x86_64_zero_extended_value (count_exp))
10069 counter_mode = SImode;
10070 else
10071 counter_mode = DImode;
10072
10073 if (counter_mode != SImode && counter_mode != DImode)
10074 abort ();
10075
10076 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10077 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10078
10079 emit_insn (gen_cld ());
10080
10081 /* When optimizing for size emit simple rep ; movsb instruction for
10082 counts not divisible by 4. */
10083
10084 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10085 {
10086 countreg = ix86_zero_extend_to_Pmode (count_exp);
10087 if (TARGET_64BIT)
10088 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10089 destreg, srcreg, countreg));
10090 else
10091 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10092 destreg, srcreg, countreg));
10093 }
10094
10095 /* For constant aligned (or small unaligned) copies use rep movsl
10096 followed by code copying the rest. For PentiumPro ensure 8 byte
10097 alignment to allow rep movsl acceleration. */
10098
10099 else if (count != 0
10100 && (align >= 8
10101 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 10102 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
10103 {
10104 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10105 if (count & ~(size - 1))
10106 {
10107 countreg = copy_to_mode_reg (counter_mode,
10108 GEN_INT ((count >> (size == 4 ? 2 : 3))
10109 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10110 countreg = ix86_zero_extend_to_Pmode (countreg);
10111 if (size == 4)
10112 {
10113 if (TARGET_64BIT)
10114 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10115 destreg, srcreg, countreg));
10116 else
10117 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10118 destreg, srcreg, countreg));
10119 }
10120 else
10121 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10122 destreg, srcreg, countreg));
10123 }
10124 if (size == 8 && (count & 0x04))
10125 emit_insn (gen_strmovsi (destreg, srcreg));
10126 if (count & 0x02)
10127 emit_insn (gen_strmovhi (destreg, srcreg));
10128 if (count & 0x01)
10129 emit_insn (gen_strmovqi (destreg, srcreg));
10130 }
10131 /* The generic code based on the glibc implementation:
10132 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10133 allowing accelerated copying there)
10134 - copy the data using rep movsl
10135 - copy the rest. */
10136 else
10137 {
10138 rtx countreg2;
10139 rtx label = NULL;
37ad04a5
JH
10140 int desired_alignment = (TARGET_PENTIUMPRO
10141 && (count == 0 || count >= (unsigned int) 260)
10142 ? 8 : UNITS_PER_WORD);
0945b39d
JH
10143
10144 /* In case we don't know anything about the alignment, default to
10145 library version, since it is usually equally fast and result in
10146 shorter code. */
10147 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10148 {
10149 end_sequence ();
10150 return 0;
10151 }
10152
10153 if (TARGET_SINGLE_STRINGOP)
10154 emit_insn (gen_cld ());
10155
10156 countreg2 = gen_reg_rtx (Pmode);
10157 countreg = copy_to_mode_reg (counter_mode, count_exp);
10158
10159 /* We don't use loops to align destination and to copy parts smaller
10160 than 4 bytes, because gcc is able to optimize such code better (in
10161 the case the destination or the count really is aligned, gcc is often
10162 able to predict the branches) and also it is friendlier to the
a4f31c00 10163 hardware branch prediction.
0945b39d
JH
10164
10165 Using loops is benefical for generic case, because we can
10166 handle small counts using the loops. Many CPUs (such as Athlon)
10167 have large REP prefix setup costs.
10168
10169 This is quite costy. Maybe we can revisit this decision later or
10170 add some customizability to this code. */
10171
37ad04a5 10172 if (count == 0 && align < desired_alignment)
0945b39d
JH
10173 {
10174 label = gen_label_rtx ();
aaae0bb9 10175 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 10176 LEU, 0, counter_mode, 1, label);
0945b39d
JH
10177 }
10178 if (align <= 1)
10179 {
10180 rtx label = ix86_expand_aligntest (destreg, 1);
10181 emit_insn (gen_strmovqi (destreg, srcreg));
10182 ix86_adjust_counter (countreg, 1);
10183 emit_label (label);
10184 LABEL_NUSES (label) = 1;
10185 }
10186 if (align <= 2)
10187 {
10188 rtx label = ix86_expand_aligntest (destreg, 2);
10189 emit_insn (gen_strmovhi (destreg, srcreg));
10190 ix86_adjust_counter (countreg, 2);
10191 emit_label (label);
10192 LABEL_NUSES (label) = 1;
10193 }
37ad04a5 10194 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
10195 {
10196 rtx label = ix86_expand_aligntest (destreg, 4);
10197 emit_insn (gen_strmovsi (destreg, srcreg));
10198 ix86_adjust_counter (countreg, 4);
10199 emit_label (label);
10200 LABEL_NUSES (label) = 1;
10201 }
10202
37ad04a5
JH
10203 if (label && desired_alignment > 4 && !TARGET_64BIT)
10204 {
10205 emit_label (label);
10206 LABEL_NUSES (label) = 1;
10207 label = NULL_RTX;
10208 }
0945b39d
JH
10209 if (!TARGET_SINGLE_STRINGOP)
10210 emit_insn (gen_cld ());
10211 if (TARGET_64BIT)
10212 {
10213 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10214 GEN_INT (3)));
10215 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10216 destreg, srcreg, countreg2));
10217 }
10218 else
10219 {
10220 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10221 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10222 destreg, srcreg, countreg2));
10223 }
10224
10225 if (label)
10226 {
10227 emit_label (label);
10228 LABEL_NUSES (label) = 1;
10229 }
10230 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10231 emit_insn (gen_strmovsi (destreg, srcreg));
10232 if ((align <= 4 || count == 0) && TARGET_64BIT)
10233 {
10234 rtx label = ix86_expand_aligntest (countreg, 4);
10235 emit_insn (gen_strmovsi (destreg, srcreg));
10236 emit_label (label);
10237 LABEL_NUSES (label) = 1;
10238 }
10239 if (align > 2 && count != 0 && (count & 2))
10240 emit_insn (gen_strmovhi (destreg, srcreg));
10241 if (align <= 2 || count == 0)
10242 {
10243 rtx label = ix86_expand_aligntest (countreg, 2);
10244 emit_insn (gen_strmovhi (destreg, srcreg));
10245 emit_label (label);
10246 LABEL_NUSES (label) = 1;
10247 }
10248 if (align > 1 && count != 0 && (count & 1))
10249 emit_insn (gen_strmovqi (destreg, srcreg));
10250 if (align <= 1 || count == 0)
10251 {
10252 rtx label = ix86_expand_aligntest (countreg, 1);
10253 emit_insn (gen_strmovqi (destreg, srcreg));
10254 emit_label (label);
10255 LABEL_NUSES (label) = 1;
10256 }
10257 }
10258
10259 insns = get_insns ();
10260 end_sequence ();
10261
10262 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
2f937369 10263 emit_insn (insns);
0945b39d
JH
10264 return 1;
10265}
10266
10267/* Expand string clear operation (bzero). Use i386 string operations when
10268 profitable. expand_movstr contains similar code. */
10269int
10270ix86_expand_clrstr (src, count_exp, align_exp)
10271 rtx src, count_exp, align_exp;
10272{
10273 rtx destreg, zeroreg, countreg;
10274 enum machine_mode counter_mode;
10275 HOST_WIDE_INT align = 0;
10276 unsigned HOST_WIDE_INT count = 0;
10277
10278 if (GET_CODE (align_exp) == CONST_INT)
10279 align = INTVAL (align_exp);
10280
5519a4f9 10281 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
10282 if (!TARGET_ALIGN_STRINGOPS)
10283 align = 32;
10284
10285 if (GET_CODE (count_exp) == CONST_INT)
10286 count = INTVAL (count_exp);
10287 /* Figure out proper mode for counter. For 32bits it is always SImode,
10288 for 64bits use SImode when possible, otherwise DImode.
10289 Set count to number of bytes copied when known at compile time. */
10290 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10291 || x86_64_zero_extended_value (count_exp))
10292 counter_mode = SImode;
10293 else
10294 counter_mode = DImode;
10295
10296 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10297
10298 emit_insn (gen_cld ());
10299
10300 /* When optimizing for size emit simple rep ; movsb instruction for
10301 counts not divisible by 4. */
10302
10303 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10304 {
10305 countreg = ix86_zero_extend_to_Pmode (count_exp);
10306 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10307 if (TARGET_64BIT)
10308 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10309 destreg, countreg));
10310 else
10311 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10312 destreg, countreg));
10313 }
10314 else if (count != 0
10315 && (align >= 8
10316 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 10317 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
10318 {
10319 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10320 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10321 if (count & ~(size - 1))
10322 {
10323 countreg = copy_to_mode_reg (counter_mode,
10324 GEN_INT ((count >> (size == 4 ? 2 : 3))
10325 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10326 countreg = ix86_zero_extend_to_Pmode (countreg);
10327 if (size == 4)
10328 {
10329 if (TARGET_64BIT)
10330 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10331 destreg, countreg));
10332 else
10333 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10334 destreg, countreg));
10335 }
10336 else
10337 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10338 destreg, countreg));
10339 }
10340 if (size == 8 && (count & 0x04))
10341 emit_insn (gen_strsetsi (destreg,
10342 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10343 if (count & 0x02)
10344 emit_insn (gen_strsethi (destreg,
10345 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10346 if (count & 0x01)
10347 emit_insn (gen_strsetqi (destreg,
10348 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10349 }
10350 else
10351 {
10352 rtx countreg2;
10353 rtx label = NULL;
37ad04a5
JH
10354 /* Compute desired alignment of the string operation. */
10355 int desired_alignment = (TARGET_PENTIUMPRO
10356 && (count == 0 || count >= (unsigned int) 260)
10357 ? 8 : UNITS_PER_WORD);
0945b39d
JH
10358
10359 /* In case we don't know anything about the alignment, default to
10360 library version, since it is usually equally fast and result in
10361 shorter code. */
10362 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10363 return 0;
10364
10365 if (TARGET_SINGLE_STRINGOP)
10366 emit_insn (gen_cld ());
10367
10368 countreg2 = gen_reg_rtx (Pmode);
10369 countreg = copy_to_mode_reg (counter_mode, count_exp);
10370 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10371
37ad04a5 10372 if (count == 0 && align < desired_alignment)
0945b39d
JH
10373 {
10374 label = gen_label_rtx ();
37ad04a5 10375 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 10376 LEU, 0, counter_mode, 1, label);
0945b39d
JH
10377 }
10378 if (align <= 1)
10379 {
10380 rtx label = ix86_expand_aligntest (destreg, 1);
10381 emit_insn (gen_strsetqi (destreg,
10382 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10383 ix86_adjust_counter (countreg, 1);
10384 emit_label (label);
10385 LABEL_NUSES (label) = 1;
10386 }
10387 if (align <= 2)
10388 {
10389 rtx label = ix86_expand_aligntest (destreg, 2);
10390 emit_insn (gen_strsethi (destreg,
10391 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10392 ix86_adjust_counter (countreg, 2);
10393 emit_label (label);
10394 LABEL_NUSES (label) = 1;
10395 }
37ad04a5 10396 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
10397 {
10398 rtx label = ix86_expand_aligntest (destreg, 4);
10399 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10400 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10401 : zeroreg)));
10402 ix86_adjust_counter (countreg, 4);
10403 emit_label (label);
10404 LABEL_NUSES (label) = 1;
10405 }
10406
37ad04a5
JH
10407 if (label && desired_alignment > 4 && !TARGET_64BIT)
10408 {
10409 emit_label (label);
10410 LABEL_NUSES (label) = 1;
10411 label = NULL_RTX;
10412 }
10413
0945b39d
JH
10414 if (!TARGET_SINGLE_STRINGOP)
10415 emit_insn (gen_cld ());
10416 if (TARGET_64BIT)
10417 {
10418 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10419 GEN_INT (3)));
10420 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10421 destreg, countreg2));
10422 }
10423 else
10424 {
10425 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10426 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10427 destreg, countreg2));
10428 }
0945b39d
JH
10429 if (label)
10430 {
10431 emit_label (label);
10432 LABEL_NUSES (label) = 1;
10433 }
37ad04a5 10434
0945b39d
JH
10435 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10436 emit_insn (gen_strsetsi (destreg,
10437 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10438 if (TARGET_64BIT && (align <= 4 || count == 0))
10439 {
79258dce 10440 rtx label = ix86_expand_aligntest (countreg, 4);
0945b39d
JH
10441 emit_insn (gen_strsetsi (destreg,
10442 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10443 emit_label (label);
10444 LABEL_NUSES (label) = 1;
10445 }
10446 if (align > 2 && count != 0 && (count & 2))
10447 emit_insn (gen_strsethi (destreg,
10448 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10449 if (align <= 2 || count == 0)
10450 {
74411039 10451 rtx label = ix86_expand_aligntest (countreg, 2);
0945b39d
JH
10452 emit_insn (gen_strsethi (destreg,
10453 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10454 emit_label (label);
10455 LABEL_NUSES (label) = 1;
10456 }
10457 if (align > 1 && count != 0 && (count & 1))
10458 emit_insn (gen_strsetqi (destreg,
10459 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10460 if (align <= 1 || count == 0)
10461 {
74411039 10462 rtx label = ix86_expand_aligntest (countreg, 1);
0945b39d
JH
10463 emit_insn (gen_strsetqi (destreg,
10464 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10465 emit_label (label);
10466 LABEL_NUSES (label) = 1;
10467 }
10468 }
10469 return 1;
10470}
10471/* Expand strlen. */
10472int
10473ix86_expand_strlen (out, src, eoschar, align)
10474 rtx out, src, eoschar, align;
10475{
10476 rtx addr, scratch1, scratch2, scratch3, scratch4;
10477
10478 /* The generic case of strlen expander is long. Avoid it's
10479 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10480
10481 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10482 && !TARGET_INLINE_ALL_STRINGOPS
10483 && !optimize_size
10484 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10485 return 0;
10486
10487 addr = force_reg (Pmode, XEXP (src, 0));
10488 scratch1 = gen_reg_rtx (Pmode);
10489
10490 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10491 && !optimize_size)
10492 {
10493 /* Well it seems that some optimizer does not combine a call like
10494 foo(strlen(bar), strlen(bar));
10495 when the move and the subtraction is done here. It does calculate
10496 the length just once when these instructions are done inside of
10497 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10498 often used and I use one fewer register for the lifetime of
10499 output_strlen_unroll() this is better. */
10500
10501 emit_move_insn (out, addr);
10502
10503 ix86_expand_strlensi_unroll_1 (out, align);
10504
10505 /* strlensi_unroll_1 returns the address of the zero at the end of
10506 the string, like memchr(), so compute the length by subtracting
10507 the start address. */
10508 if (TARGET_64BIT)
10509 emit_insn (gen_subdi3 (out, out, addr));
10510 else
10511 emit_insn (gen_subsi3 (out, out, addr));
10512 }
10513 else
10514 {
10515 scratch2 = gen_reg_rtx (Pmode);
10516 scratch3 = gen_reg_rtx (Pmode);
10517 scratch4 = force_reg (Pmode, constm1_rtx);
10518
10519 emit_move_insn (scratch3, addr);
10520 eoschar = force_reg (QImode, eoschar);
10521
10522 emit_insn (gen_cld ());
10523 if (TARGET_64BIT)
10524 {
10525 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10526 align, scratch4, scratch3));
10527 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10528 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10529 }
10530 else
10531 {
10532 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10533 align, scratch4, scratch3));
10534 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10535 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10536 }
10537 }
10538 return 1;
10539}
10540
e075ae69
RH
10541/* Expand the appropriate insns for doing strlen if not just doing
10542 repnz; scasb
10543
10544 out = result, initialized with the start address
10545 align_rtx = alignment of the address.
10546 scratch = scratch register, initialized with the startaddress when
77ebd435 10547 not aligned, otherwise undefined
3f803cd9
SC
10548
10549 This is just the body. It needs the initialisations mentioned above and
10550 some address computing at the end. These things are done in i386.md. */
10551
0945b39d
JH
10552static void
10553ix86_expand_strlensi_unroll_1 (out, align_rtx)
10554 rtx out, align_rtx;
3f803cd9 10555{
e075ae69
RH
10556 int align;
10557 rtx tmp;
10558 rtx align_2_label = NULL_RTX;
10559 rtx align_3_label = NULL_RTX;
10560 rtx align_4_label = gen_label_rtx ();
10561 rtx end_0_label = gen_label_rtx ();
e075ae69 10562 rtx mem;
e2e52e1b 10563 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 10564 rtx scratch = gen_reg_rtx (SImode);
e075ae69
RH
10565
10566 align = 0;
10567 if (GET_CODE (align_rtx) == CONST_INT)
10568 align = INTVAL (align_rtx);
3f803cd9 10569
e9a25f70 10570 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 10571
e9a25f70 10572 /* Is there a known alignment and is it less than 4? */
e075ae69 10573 if (align < 4)
3f803cd9 10574 {
0945b39d
JH
10575 rtx scratch1 = gen_reg_rtx (Pmode);
10576 emit_move_insn (scratch1, out);
e9a25f70 10577 /* Is there a known alignment and is it not 2? */
e075ae69 10578 if (align != 2)
3f803cd9 10579 {
e075ae69
RH
10580 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10581 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10582
10583 /* Leave just the 3 lower bits. */
0945b39d 10584 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
10585 NULL_RTX, 0, OPTAB_WIDEN);
10586
9076b9c1 10587 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 10588 Pmode, 1, align_4_label);
9076b9c1 10589 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
d43e0b7d 10590 Pmode, 1, align_2_label);
9076b9c1 10591 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
d43e0b7d 10592 Pmode, 1, align_3_label);
3f803cd9
SC
10593 }
10594 else
10595 {
e9a25f70
JL
10596 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10597 check if is aligned to 4 - byte. */
e9a25f70 10598
0945b39d 10599 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
e075ae69
RH
10600 NULL_RTX, 0, OPTAB_WIDEN);
10601
9076b9c1 10602 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 10603 Pmode, 1, align_4_label);
3f803cd9
SC
10604 }
10605
e075ae69 10606 mem = gen_rtx_MEM (QImode, out);
e9a25f70 10607
e075ae69 10608 /* Now compare the bytes. */
e9a25f70 10609
0f290768 10610 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1 10611 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
d43e0b7d 10612 QImode, 1, end_0_label);
3f803cd9 10613
0f290768 10614 /* Increment the address. */
0945b39d
JH
10615 if (TARGET_64BIT)
10616 emit_insn (gen_adddi3 (out, out, const1_rtx));
10617 else
10618 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 10619
e075ae69
RH
10620 /* Not needed with an alignment of 2 */
10621 if (align != 2)
10622 {
10623 emit_label (align_2_label);
3f803cd9 10624
d43e0b7d
RK
10625 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10626 end_0_label);
e075ae69 10627
0945b39d
JH
10628 if (TARGET_64BIT)
10629 emit_insn (gen_adddi3 (out, out, const1_rtx));
10630 else
10631 emit_insn (gen_addsi3 (out, out, const1_rtx));
e075ae69
RH
10632
10633 emit_label (align_3_label);
10634 }
10635
d43e0b7d
RK
10636 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10637 end_0_label);
e075ae69 10638
0945b39d
JH
10639 if (TARGET_64BIT)
10640 emit_insn (gen_adddi3 (out, out, const1_rtx));
10641 else
10642 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
10643 }
10644
e075ae69
RH
10645 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10646 align this loop. It gives only huge programs, but does not help to
10647 speed up. */
10648 emit_label (align_4_label);
3f803cd9 10649
e075ae69
RH
10650 mem = gen_rtx_MEM (SImode, out);
10651 emit_move_insn (scratch, mem);
0945b39d
JH
10652 if (TARGET_64BIT)
10653 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10654 else
10655 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 10656
e2e52e1b
JH
10657 /* This formula yields a nonzero result iff one of the bytes is zero.
10658 This saves three branches inside loop and many cycles. */
10659
10660 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
10661 emit_insn (gen_one_cmplsi2 (scratch, scratch));
10662 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0 10663 emit_insn (gen_andsi3 (tmpreg, tmpreg,
d8bf17f9 10664 gen_int_mode (0x80808080, SImode)));
d43e0b7d
RK
10665 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
10666 align_4_label);
e2e52e1b
JH
10667
10668 if (TARGET_CMOVE)
10669 {
10670 rtx reg = gen_reg_rtx (SImode);
0945b39d 10671 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
10672 emit_move_insn (reg, tmpreg);
10673 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
10674
0f290768 10675 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 10676 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
10677 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10678 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10679 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
10680 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
10681 reg,
10682 tmpreg)));
e2e52e1b 10683 /* Emit lea manually to avoid clobbering of flags. */
0945b39d
JH
10684 emit_insn (gen_rtx_SET (SImode, reg2,
10685 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
e2e52e1b
JH
10686
10687 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10688 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10689 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d 10690 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
a4f31c00
AJ
10691 reg2,
10692 out)));
e2e52e1b
JH
10693
10694 }
10695 else
10696 {
10697 rtx end_2_label = gen_label_rtx ();
10698 /* Is zero in the first two bytes? */
10699
16189740 10700 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
10701 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10702 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
10703 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10704 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
10705 pc_rtx);
10706 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10707 JUMP_LABEL (tmp) = end_2_label;
10708
0f290768 10709 /* Not in the first two. Move two bytes forward. */
e2e52e1b 10710 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
0945b39d
JH
10711 if (TARGET_64BIT)
10712 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
10713 else
10714 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
e2e52e1b
JH
10715
10716 emit_label (end_2_label);
10717
10718 }
10719
0f290768 10720 /* Avoid branch in fixing the byte. */
e2e52e1b 10721 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 10722 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
0945b39d
JH
10723 if (TARGET_64BIT)
10724 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
10725 else
10726 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
e075ae69
RH
10727
10728 emit_label (end_0_label);
10729}
0e07aff3
RH
10730
10731void
10732ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
10733 rtx retval, fnaddr, callarg1, callarg2, pop;
10734{
10735 rtx use = NULL, call;
10736
10737 if (pop == const0_rtx)
10738 pop = NULL;
10739 if (TARGET_64BIT && pop)
10740 abort ();
10741
b069de3b
SS
10742#if TARGET_MACHO
10743 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
10744 fnaddr = machopic_indirect_call_target (fnaddr);
10745#else
0e07aff3
RH
10746 /* Static functions and indirect calls don't need the pic register. */
10747 if (! TARGET_64BIT && flag_pic
10748 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
10749 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
66edd3b4 10750 use_reg (&use, pic_offset_table_rtx);
0e07aff3
RH
10751
10752 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
10753 {
10754 rtx al = gen_rtx_REG (QImode, 0);
10755 emit_move_insn (al, callarg2);
10756 use_reg (&use, al);
10757 }
b069de3b 10758#endif /* TARGET_MACHO */
0e07aff3
RH
10759
10760 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
10761 {
10762 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10763 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10764 }
10765
10766 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
10767 if (retval)
10768 call = gen_rtx_SET (VOIDmode, retval, call);
10769 if (pop)
10770 {
10771 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
10772 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
10773 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
10774 }
10775
10776 call = emit_call_insn (call);
10777 if (use)
10778 CALL_INSN_FUNCTION_USAGE (call) = use;
10779}
fce5a9f2 10780
e075ae69 10781\f
e075ae69
RH
10782/* Clear stack slot assignments remembered from previous functions.
10783 This is called from INIT_EXPANDERS once before RTL is emitted for each
10784 function. */
10785
e2500fed
GK
10786static struct machine_function *
10787ix86_init_machine_status ()
37b15744 10788{
e2500fed 10789 return ggc_alloc_cleared (sizeof (struct machine_function));
1526a060
BS
10790}
10791
e075ae69
RH
10792/* Return a MEM corresponding to a stack slot with mode MODE.
10793 Allocate a new slot if necessary.
10794
10795 The RTL for a function can have several slots available: N is
10796 which slot to use. */
10797
10798rtx
10799assign_386_stack_local (mode, n)
10800 enum machine_mode mode;
10801 int n;
10802{
10803 if (n < 0 || n >= MAX_386_STACK_LOCALS)
10804 abort ();
10805
10806 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
10807 ix86_stack_locals[(int) mode][n]
10808 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
10809
10810 return ix86_stack_locals[(int) mode][n];
10811}
f996902d
RH
10812
10813/* Construct the SYMBOL_REF for the tls_get_addr function. */
10814
e2500fed 10815static GTY(()) rtx ix86_tls_symbol;
f996902d
RH
10816rtx
10817ix86_tls_get_addr ()
10818{
f996902d 10819
e2500fed 10820 if (!ix86_tls_symbol)
f996902d 10821 {
e2500fed 10822 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, (TARGET_GNU_TLS
f996902d
RH
10823 ? "___tls_get_addr"
10824 : "__tls_get_addr"));
f996902d
RH
10825 }
10826
e2500fed 10827 return ix86_tls_symbol;
f996902d 10828}
e075ae69
RH
10829\f
10830/* Calculate the length of the memory address in the instruction
10831 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10832
10833static int
10834memory_address_length (addr)
10835 rtx addr;
10836{
10837 struct ix86_address parts;
10838 rtx base, index, disp;
10839 int len;
10840
10841 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
10842 || GET_CODE (addr) == POST_INC
10843 || GET_CODE (addr) == PRE_MODIFY
10844 || GET_CODE (addr) == POST_MODIFY)
e075ae69 10845 return 0;
3f803cd9 10846
e075ae69
RH
10847 if (! ix86_decompose_address (addr, &parts))
10848 abort ();
3f803cd9 10849
e075ae69
RH
10850 base = parts.base;
10851 index = parts.index;
10852 disp = parts.disp;
10853 len = 0;
3f803cd9 10854
e075ae69
RH
10855 /* Register Indirect. */
10856 if (base && !index && !disp)
10857 {
10858 /* Special cases: ebp and esp need the two-byte modrm form. */
10859 if (addr == stack_pointer_rtx
10860 || addr == arg_pointer_rtx
564d80f4
JH
10861 || addr == frame_pointer_rtx
10862 || addr == hard_frame_pointer_rtx)
e075ae69 10863 len = 1;
3f803cd9 10864 }
e9a25f70 10865
e075ae69
RH
10866 /* Direct Addressing. */
10867 else if (disp && !base && !index)
10868 len = 4;
10869
3f803cd9
SC
10870 else
10871 {
e075ae69
RH
10872 /* Find the length of the displacement constant. */
10873 if (disp)
10874 {
10875 if (GET_CODE (disp) == CONST_INT
10876 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
10877 len = 1;
10878 else
10879 len = 4;
10880 }
3f803cd9 10881
e075ae69
RH
10882 /* An index requires the two-byte modrm form. */
10883 if (index)
10884 len += 1;
3f803cd9
SC
10885 }
10886
e075ae69
RH
10887 return len;
10888}
79325812 10889
5bf0ebab
RH
10890/* Compute default value for "length_immediate" attribute. When SHORTFORM
10891 is set, expect that insn have 8bit immediate alternative. */
e075ae69 10892int
6ef67412 10893ix86_attr_length_immediate_default (insn, shortform)
e075ae69 10894 rtx insn;
6ef67412 10895 int shortform;
e075ae69 10896{
6ef67412
JH
10897 int len = 0;
10898 int i;
6c698a6d 10899 extract_insn_cached (insn);
6ef67412
JH
10900 for (i = recog_data.n_operands - 1; i >= 0; --i)
10901 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 10902 {
6ef67412 10903 if (len)
3071fab5 10904 abort ();
6ef67412
JH
10905 if (shortform
10906 && GET_CODE (recog_data.operand[i]) == CONST_INT
10907 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
10908 len = 1;
10909 else
10910 {
10911 switch (get_attr_mode (insn))
10912 {
10913 case MODE_QI:
10914 len+=1;
10915 break;
10916 case MODE_HI:
10917 len+=2;
10918 break;
10919 case MODE_SI:
10920 len+=4;
10921 break;
14f73b5a
JH
10922 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
10923 case MODE_DI:
10924 len+=4;
10925 break;
6ef67412 10926 default:
c725bd79 10927 fatal_insn ("unknown insn mode", insn);
6ef67412
JH
10928 }
10929 }
3071fab5 10930 }
6ef67412
JH
10931 return len;
10932}
10933/* Compute default value for "length_address" attribute. */
10934int
10935ix86_attr_length_address_default (insn)
10936 rtx insn;
10937{
10938 int i;
6c698a6d 10939 extract_insn_cached (insn);
1ccbefce
RH
10940 for (i = recog_data.n_operands - 1; i >= 0; --i)
10941 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 10942 {
6ef67412 10943 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
10944 break;
10945 }
6ef67412 10946 return 0;
3f803cd9 10947}
e075ae69
RH
10948\f
10949/* Return the maximum number of instructions a cpu can issue. */
b657fc39 10950
c237e94a 10951static int
e075ae69 10952ix86_issue_rate ()
b657fc39 10953{
e075ae69 10954 switch (ix86_cpu)
b657fc39 10955 {
e075ae69
RH
10956 case PROCESSOR_PENTIUM:
10957 case PROCESSOR_K6:
10958 return 2;
79325812 10959
e075ae69 10960 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
10961 case PROCESSOR_PENTIUM4:
10962 case PROCESSOR_ATHLON:
e075ae69 10963 return 3;
b657fc39 10964
b657fc39 10965 default:
e075ae69 10966 return 1;
b657fc39 10967 }
b657fc39
L
10968}
10969
e075ae69
RH
10970/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10971 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 10972
e075ae69
RH
10973static int
10974ix86_flags_dependant (insn, dep_insn, insn_type)
10975 rtx insn, dep_insn;
10976 enum attr_type insn_type;
10977{
10978 rtx set, set2;
b657fc39 10979
e075ae69
RH
10980 /* Simplify the test for uninteresting insns. */
10981 if (insn_type != TYPE_SETCC
10982 && insn_type != TYPE_ICMOV
10983 && insn_type != TYPE_FCMOV
10984 && insn_type != TYPE_IBR)
10985 return 0;
b657fc39 10986
e075ae69
RH
10987 if ((set = single_set (dep_insn)) != 0)
10988 {
10989 set = SET_DEST (set);
10990 set2 = NULL_RTX;
10991 }
10992 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
10993 && XVECLEN (PATTERN (dep_insn), 0) == 2
10994 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
10995 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
10996 {
10997 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10998 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10999 }
78a0d70c
ZW
11000 else
11001 return 0;
b657fc39 11002
78a0d70c
ZW
11003 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11004 return 0;
b657fc39 11005
f5143c46 11006 /* This test is true if the dependent insn reads the flags but
78a0d70c
ZW
11007 not any other potentially set register. */
11008 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11009 return 0;
11010
11011 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11012 return 0;
11013
11014 return 1;
e075ae69 11015}
b657fc39 11016
e075ae69
RH
11017/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11018 address with operands set by DEP_INSN. */
11019
11020static int
11021ix86_agi_dependant (insn, dep_insn, insn_type)
11022 rtx insn, dep_insn;
11023 enum attr_type insn_type;
11024{
11025 rtx addr;
11026
6ad48e84
JH
11027 if (insn_type == TYPE_LEA
11028 && TARGET_PENTIUM)
5fbdde42
RH
11029 {
11030 addr = PATTERN (insn);
11031 if (GET_CODE (addr) == SET)
11032 ;
11033 else if (GET_CODE (addr) == PARALLEL
11034 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11035 addr = XVECEXP (addr, 0, 0);
11036 else
11037 abort ();
11038 addr = SET_SRC (addr);
11039 }
e075ae69
RH
11040 else
11041 {
11042 int i;
6c698a6d 11043 extract_insn_cached (insn);
1ccbefce
RH
11044 for (i = recog_data.n_operands - 1; i >= 0; --i)
11045 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 11046 {
1ccbefce 11047 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
11048 goto found;
11049 }
11050 return 0;
11051 found:;
b657fc39
L
11052 }
11053
e075ae69 11054 return modified_in_p (addr, dep_insn);
b657fc39 11055}
a269a03c 11056
c237e94a 11057static int
e075ae69 11058ix86_adjust_cost (insn, link, dep_insn, cost)
a269a03c
JC
11059 rtx insn, link, dep_insn;
11060 int cost;
11061{
e075ae69 11062 enum attr_type insn_type, dep_insn_type;
6ad48e84 11063 enum attr_memory memory, dep_memory;
e075ae69 11064 rtx set, set2;
9b00189f 11065 int dep_insn_code_number;
a269a03c 11066
309ada50 11067 /* Anti and output depenancies have zero cost on all CPUs. */
e075ae69 11068 if (REG_NOTE_KIND (link) != 0)
309ada50 11069 return 0;
a269a03c 11070
9b00189f
JH
11071 dep_insn_code_number = recog_memoized (dep_insn);
11072
e075ae69 11073 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 11074 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 11075 return cost;
a269a03c 11076
1c71e60e
JH
11077 insn_type = get_attr_type (insn);
11078 dep_insn_type = get_attr_type (dep_insn);
9b00189f 11079
a269a03c
JC
11080 switch (ix86_cpu)
11081 {
11082 case PROCESSOR_PENTIUM:
e075ae69
RH
11083 /* Address Generation Interlock adds a cycle of latency. */
11084 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11085 cost += 1;
11086
11087 /* ??? Compares pair with jump/setcc. */
11088 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11089 cost = 0;
11090
11091 /* Floating point stores require value to be ready one cycle ealier. */
0f290768 11092 if (insn_type == TYPE_FMOV
e075ae69
RH
11093 && get_attr_memory (insn) == MEMORY_STORE
11094 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11095 cost += 1;
11096 break;
a269a03c 11097
e075ae69 11098 case PROCESSOR_PENTIUMPRO:
6ad48e84
JH
11099 memory = get_attr_memory (insn);
11100 dep_memory = get_attr_memory (dep_insn);
11101
0f290768 11102 /* Since we can't represent delayed latencies of load+operation,
e075ae69
RH
11103 increase the cost here for non-imov insns. */
11104 if (dep_insn_type != TYPE_IMOV
6ad48e84
JH
11105 && dep_insn_type != TYPE_FMOV
11106 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
e075ae69
RH
11107 cost += 1;
11108
11109 /* INT->FP conversion is expensive. */
11110 if (get_attr_fp_int_src (dep_insn))
11111 cost += 5;
11112
11113 /* There is one cycle extra latency between an FP op and a store. */
11114 if (insn_type == TYPE_FMOV
11115 && (set = single_set (dep_insn)) != NULL_RTX
11116 && (set2 = single_set (insn)) != NULL_RTX
11117 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11118 && GET_CODE (SET_DEST (set2)) == MEM)
11119 cost += 1;
6ad48e84
JH
11120
11121 /* Show ability of reorder buffer to hide latency of load by executing
11122 in parallel with previous instruction in case
11123 previous instruction is not needed to compute the address. */
11124 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11125 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11126 {
11127 /* Claim moves to take one cycle, as core can issue one load
11128 at time and the next load can start cycle later. */
11129 if (dep_insn_type == TYPE_IMOV
11130 || dep_insn_type == TYPE_FMOV)
11131 cost = 1;
11132 else if (cost > 1)
11133 cost--;
11134 }
e075ae69 11135 break;
a269a03c 11136
e075ae69 11137 case PROCESSOR_K6:
6ad48e84
JH
11138 memory = get_attr_memory (insn);
11139 dep_memory = get_attr_memory (dep_insn);
e075ae69
RH
11140 /* The esp dependency is resolved before the instruction is really
11141 finished. */
11142 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11143 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11144 return 1;
a269a03c 11145
0f290768 11146 /* Since we can't represent delayed latencies of load+operation,
e075ae69 11147 increase the cost here for non-imov insns. */
6ad48e84 11148 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
e075ae69
RH
11149 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11150
11151 /* INT->FP conversion is expensive. */
11152 if (get_attr_fp_int_src (dep_insn))
11153 cost += 5;
6ad48e84
JH
11154
11155 /* Show ability of reorder buffer to hide latency of load by executing
11156 in parallel with previous instruction in case
11157 previous instruction is not needed to compute the address. */
11158 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11159 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11160 {
11161 /* Claim moves to take one cycle, as core can issue one load
11162 at time and the next load can start cycle later. */
11163 if (dep_insn_type == TYPE_IMOV
11164 || dep_insn_type == TYPE_FMOV)
11165 cost = 1;
11166 else if (cost > 2)
11167 cost -= 2;
11168 else
11169 cost = 1;
11170 }
a14003ee 11171 break;
e075ae69 11172
309ada50 11173 case PROCESSOR_ATHLON:
6ad48e84
JH
11174 memory = get_attr_memory (insn);
11175 dep_memory = get_attr_memory (dep_insn);
11176
11177 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
0b5107cf
JH
11178 {
11179 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
11180 cost += 2;
11181 else
11182 cost += 3;
11183 }
6ad48e84
JH
11184 /* Show ability of reorder buffer to hide latency of load by executing
11185 in parallel with previous instruction in case
11186 previous instruction is not needed to compute the address. */
11187 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11188 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11189 {
11190 /* Claim moves to take one cycle, as core can issue one load
11191 at time and the next load can start cycle later. */
11192 if (dep_insn_type == TYPE_IMOV
11193 || dep_insn_type == TYPE_FMOV)
11194 cost = 0;
11195 else if (cost >= 3)
11196 cost -= 3;
11197 else
11198 cost = 0;
11199 }
309ada50 11200
a269a03c 11201 default:
a269a03c
JC
11202 break;
11203 }
11204
11205 return cost;
11206}
0a726ef1 11207
e075ae69
RH
11208static union
11209{
11210 struct ppro_sched_data
11211 {
11212 rtx decode[3];
11213 int issued_this_cycle;
11214 } ppro;
11215} ix86_sched_data;
0a726ef1 11216
e075ae69
RH
11217static enum attr_ppro_uops
11218ix86_safe_ppro_uops (insn)
11219 rtx insn;
11220{
11221 if (recog_memoized (insn) >= 0)
11222 return get_attr_ppro_uops (insn);
11223 else
11224 return PPRO_UOPS_MANY;
11225}
0a726ef1 11226
e075ae69
RH
11227static void
11228ix86_dump_ppro_packet (dump)
11229 FILE *dump;
0a726ef1 11230{
e075ae69 11231 if (ix86_sched_data.ppro.decode[0])
0a726ef1 11232 {
e075ae69
RH
11233 fprintf (dump, "PPRO packet: %d",
11234 INSN_UID (ix86_sched_data.ppro.decode[0]));
11235 if (ix86_sched_data.ppro.decode[1])
11236 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11237 if (ix86_sched_data.ppro.decode[2])
11238 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11239 fputc ('\n', dump);
11240 }
11241}
0a726ef1 11242
e075ae69 11243/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 11244
c237e94a
ZW
11245static void
11246ix86_sched_init (dump, sched_verbose, veclen)
e075ae69
RH
11247 FILE *dump ATTRIBUTE_UNUSED;
11248 int sched_verbose ATTRIBUTE_UNUSED;
c237e94a 11249 int veclen ATTRIBUTE_UNUSED;
e075ae69
RH
11250{
11251 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11252}
11253
11254/* Shift INSN to SLOT, and shift everything else down. */
11255
11256static void
11257ix86_reorder_insn (insnp, slot)
11258 rtx *insnp, *slot;
11259{
11260 if (insnp != slot)
11261 {
11262 rtx insn = *insnp;
0f290768 11263 do
e075ae69
RH
11264 insnp[0] = insnp[1];
11265 while (++insnp != slot);
11266 *insnp = insn;
0a726ef1 11267 }
e075ae69
RH
11268}
11269
c6991660 11270static void
78a0d70c
ZW
11271ix86_sched_reorder_ppro (ready, e_ready)
11272 rtx *ready;
11273 rtx *e_ready;
11274{
11275 rtx decode[3];
11276 enum attr_ppro_uops cur_uops;
11277 int issued_this_cycle;
11278 rtx *insnp;
11279 int i;
e075ae69 11280
0f290768 11281 /* At this point .ppro.decode contains the state of the three
78a0d70c 11282 decoders from last "cycle". That is, those insns that were
0f290768 11283 actually independent. But here we're scheduling for the
78a0d70c
ZW
11284 decoder, and we may find things that are decodable in the
11285 same cycle. */
e075ae69 11286
0f290768 11287 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
78a0d70c 11288 issued_this_cycle = 0;
e075ae69 11289
78a0d70c
ZW
11290 insnp = e_ready;
11291 cur_uops = ix86_safe_ppro_uops (*insnp);
0a726ef1 11292
78a0d70c
ZW
11293 /* If the decoders are empty, and we've a complex insn at the
11294 head of the priority queue, let it issue without complaint. */
11295 if (decode[0] == NULL)
11296 {
11297 if (cur_uops == PPRO_UOPS_MANY)
11298 {
11299 decode[0] = *insnp;
11300 goto ppro_done;
11301 }
11302
11303 /* Otherwise, search for a 2-4 uop unsn to issue. */
11304 while (cur_uops != PPRO_UOPS_FEW)
11305 {
11306 if (insnp == ready)
11307 break;
11308 cur_uops = ix86_safe_ppro_uops (*--insnp);
11309 }
11310
11311 /* If so, move it to the head of the line. */
11312 if (cur_uops == PPRO_UOPS_FEW)
11313 ix86_reorder_insn (insnp, e_ready);
0a726ef1 11314
78a0d70c
ZW
11315 /* Issue the head of the queue. */
11316 issued_this_cycle = 1;
11317 decode[0] = *e_ready--;
11318 }
fb693d44 11319
78a0d70c
ZW
11320 /* Look for simple insns to fill in the other two slots. */
11321 for (i = 1; i < 3; ++i)
11322 if (decode[i] == NULL)
11323 {
a151daf0 11324 if (ready > e_ready)
78a0d70c 11325 goto ppro_done;
fb693d44 11326
e075ae69
RH
11327 insnp = e_ready;
11328 cur_uops = ix86_safe_ppro_uops (*insnp);
78a0d70c
ZW
11329 while (cur_uops != PPRO_UOPS_ONE)
11330 {
11331 if (insnp == ready)
11332 break;
11333 cur_uops = ix86_safe_ppro_uops (*--insnp);
11334 }
fb693d44 11335
78a0d70c
ZW
11336 /* Found one. Move it to the head of the queue and issue it. */
11337 if (cur_uops == PPRO_UOPS_ONE)
e075ae69 11338 {
78a0d70c
ZW
11339 ix86_reorder_insn (insnp, e_ready);
11340 decode[i] = *e_ready--;
11341 issued_this_cycle++;
11342 continue;
11343 }
fb693d44 11344
78a0d70c
ZW
11345 /* ??? Didn't find one. Ideally, here we would do a lazy split
11346 of 2-uop insns, issue one and queue the other. */
11347 }
fb693d44 11348
78a0d70c
ZW
11349 ppro_done:
11350 if (issued_this_cycle == 0)
11351 issued_this_cycle = 1;
11352 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11353}
fb693d44 11354
0f290768 11355/* We are about to being issuing insns for this clock cycle.
78a0d70c 11356 Override the default sort algorithm to better slot instructions. */
c237e94a
ZW
11357static int
11358ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
78a0d70c
ZW
11359 FILE *dump ATTRIBUTE_UNUSED;
11360 int sched_verbose ATTRIBUTE_UNUSED;
11361 rtx *ready;
c237e94a 11362 int *n_readyp;
78a0d70c
ZW
11363 int clock_var ATTRIBUTE_UNUSED;
11364{
c237e94a 11365 int n_ready = *n_readyp;
78a0d70c 11366 rtx *e_ready = ready + n_ready - 1;
fb693d44 11367
fce5a9f2 11368 /* Make sure to go ahead and initialize key items in
a151daf0
JL
11369 ix86_sched_data if we are not going to bother trying to
11370 reorder the ready queue. */
78a0d70c 11371 if (n_ready < 2)
a151daf0
JL
11372 {
11373 ix86_sched_data.ppro.issued_this_cycle = 1;
11374 goto out;
11375 }
e075ae69 11376
78a0d70c
ZW
11377 switch (ix86_cpu)
11378 {
11379 default:
11380 break;
e075ae69 11381
78a0d70c
ZW
11382 case PROCESSOR_PENTIUMPRO:
11383 ix86_sched_reorder_ppro (ready, e_ready);
e075ae69 11384 break;
fb693d44
RH
11385 }
11386
e075ae69
RH
11387out:
11388 return ix86_issue_rate ();
11389}
fb693d44 11390
e075ae69
RH
11391/* We are about to issue INSN. Return the number of insns left on the
11392 ready queue that can be issued this cycle. */
b222082e 11393
c237e94a 11394static int
e075ae69
RH
11395ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11396 FILE *dump;
11397 int sched_verbose;
11398 rtx insn;
11399 int can_issue_more;
11400{
11401 int i;
11402 switch (ix86_cpu)
fb693d44 11403 {
e075ae69
RH
11404 default:
11405 return can_issue_more - 1;
fb693d44 11406
e075ae69
RH
11407 case PROCESSOR_PENTIUMPRO:
11408 {
11409 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 11410
e075ae69
RH
11411 if (uops == PPRO_UOPS_MANY)
11412 {
11413 if (sched_verbose)
11414 ix86_dump_ppro_packet (dump);
11415 ix86_sched_data.ppro.decode[0] = insn;
11416 ix86_sched_data.ppro.decode[1] = NULL;
11417 ix86_sched_data.ppro.decode[2] = NULL;
11418 if (sched_verbose)
11419 ix86_dump_ppro_packet (dump);
11420 ix86_sched_data.ppro.decode[0] = NULL;
11421 }
11422 else if (uops == PPRO_UOPS_FEW)
11423 {
11424 if (sched_verbose)
11425 ix86_dump_ppro_packet (dump);
11426 ix86_sched_data.ppro.decode[0] = insn;
11427 ix86_sched_data.ppro.decode[1] = NULL;
11428 ix86_sched_data.ppro.decode[2] = NULL;
11429 }
11430 else
11431 {
11432 for (i = 0; i < 3; ++i)
11433 if (ix86_sched_data.ppro.decode[i] == NULL)
11434 {
11435 ix86_sched_data.ppro.decode[i] = insn;
11436 break;
11437 }
11438 if (i == 3)
11439 abort ();
11440 if (i == 2)
11441 {
11442 if (sched_verbose)
11443 ix86_dump_ppro_packet (dump);
11444 ix86_sched_data.ppro.decode[0] = NULL;
11445 ix86_sched_data.ppro.decode[1] = NULL;
11446 ix86_sched_data.ppro.decode[2] = NULL;
11447 }
11448 }
11449 }
11450 return --ix86_sched_data.ppro.issued_this_cycle;
11451 }
fb693d44 11452}
9b690711
RH
11453
11454static int
11455ia32_use_dfa_pipeline_interface ()
11456{
11457 if (ix86_cpu == PROCESSOR_PENTIUM)
11458 return 1;
11459 return 0;
11460}
11461
11462/* How many alternative schedules to try. This should be as wide as the
11463 scheduling freedom in the DFA, but no wider. Making this value too
11464 large results extra work for the scheduler. */
11465
11466static int
11467ia32_multipass_dfa_lookahead ()
11468{
11469 if (ix86_cpu == PROCESSOR_PENTIUM)
11470 return 2;
11471 else
11472 return 0;
11473}
11474
a7180f70 11475\f
0e4970d7
RK
11476/* Walk through INSNS and look for MEM references whose address is DSTREG or
11477 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11478 appropriate. */
11479
11480void
11481ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11482 rtx insns;
11483 rtx dstref, srcref, dstreg, srcreg;
11484{
11485 rtx insn;
11486
11487 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11488 if (INSN_P (insn))
11489 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11490 dstreg, srcreg);
11491}
11492
11493/* Subroutine of above to actually do the updating by recursively walking
11494 the rtx. */
11495
11496static void
11497ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11498 rtx x;
11499 rtx dstref, srcref, dstreg, srcreg;
11500{
11501 enum rtx_code code = GET_CODE (x);
11502 const char *format_ptr = GET_RTX_FORMAT (code);
11503 int i, j;
11504
11505 if (code == MEM && XEXP (x, 0) == dstreg)
11506 MEM_COPY_ATTRIBUTES (x, dstref);
11507 else if (code == MEM && XEXP (x, 0) == srcreg)
11508 MEM_COPY_ATTRIBUTES (x, srcref);
11509
11510 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11511 {
11512 if (*format_ptr == 'e')
11513 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11514 dstreg, srcreg);
11515 else if (*format_ptr == 'E')
11516 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
397be6cf 11517 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
0e4970d7
RK
11518 dstreg, srcreg);
11519 }
11520}
11521\f
a7180f70
BS
11522/* Compute the alignment given to a constant that is being placed in memory.
11523 EXP is the constant and ALIGN is the alignment that the object would
11524 ordinarily have.
11525 The value of this function is used instead of that alignment to align
11526 the object. */
11527
11528int
11529ix86_constant_alignment (exp, align)
11530 tree exp;
11531 int align;
11532{
11533 if (TREE_CODE (exp) == REAL_CST)
11534 {
11535 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11536 return 64;
11537 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11538 return 128;
11539 }
11540 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
11541 && align < 256)
11542 return 256;
11543
11544 return align;
11545}
11546
11547/* Compute the alignment for a static variable.
11548 TYPE is the data type, and ALIGN is the alignment that
11549 the object would ordinarily have. The value of this function is used
11550 instead of that alignment to align the object. */
11551
11552int
11553ix86_data_alignment (type, align)
11554 tree type;
11555 int align;
11556{
11557 if (AGGREGATE_TYPE_P (type)
11558 && TYPE_SIZE (type)
11559 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11560 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11561 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11562 return 256;
11563
0d7d98ee
JH
11564 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11565 to 16byte boundary. */
11566 if (TARGET_64BIT)
11567 {
11568 if (AGGREGATE_TYPE_P (type)
11569 && TYPE_SIZE (type)
11570 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11571 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11572 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11573 return 128;
11574 }
11575
a7180f70
BS
11576 if (TREE_CODE (type) == ARRAY_TYPE)
11577 {
11578 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11579 return 64;
11580 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11581 return 128;
11582 }
11583 else if (TREE_CODE (type) == COMPLEX_TYPE)
11584 {
0f290768 11585
a7180f70
BS
11586 if (TYPE_MODE (type) == DCmode && align < 64)
11587 return 64;
11588 if (TYPE_MODE (type) == XCmode && align < 128)
11589 return 128;
11590 }
11591 else if ((TREE_CODE (type) == RECORD_TYPE
11592 || TREE_CODE (type) == UNION_TYPE
11593 || TREE_CODE (type) == QUAL_UNION_TYPE)
11594 && TYPE_FIELDS (type))
11595 {
11596 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11597 return 64;
11598 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11599 return 128;
11600 }
11601 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11602 || TREE_CODE (type) == INTEGER_TYPE)
11603 {
11604 if (TYPE_MODE (type) == DFmode && align < 64)
11605 return 64;
11606 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11607 return 128;
11608 }
11609
11610 return align;
11611}
11612
11613/* Compute the alignment for a local variable.
11614 TYPE is the data type, and ALIGN is the alignment that
11615 the object would ordinarily have. The value of this macro is used
11616 instead of that alignment to align the object. */
11617
11618int
11619ix86_local_alignment (type, align)
11620 tree type;
11621 int align;
11622{
0d7d98ee
JH
11623 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11624 to 16byte boundary. */
11625 if (TARGET_64BIT)
11626 {
11627 if (AGGREGATE_TYPE_P (type)
11628 && TYPE_SIZE (type)
11629 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11630 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11631 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11632 return 128;
11633 }
a7180f70
BS
11634 if (TREE_CODE (type) == ARRAY_TYPE)
11635 {
11636 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11637 return 64;
11638 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11639 return 128;
11640 }
11641 else if (TREE_CODE (type) == COMPLEX_TYPE)
11642 {
11643 if (TYPE_MODE (type) == DCmode && align < 64)
11644 return 64;
11645 if (TYPE_MODE (type) == XCmode && align < 128)
11646 return 128;
11647 }
11648 else if ((TREE_CODE (type) == RECORD_TYPE
11649 || TREE_CODE (type) == UNION_TYPE
11650 || TREE_CODE (type) == QUAL_UNION_TYPE)
11651 && TYPE_FIELDS (type))
11652 {
11653 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11654 return 64;
11655 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11656 return 128;
11657 }
11658 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11659 || TREE_CODE (type) == INTEGER_TYPE)
11660 {
0f290768 11661
a7180f70
BS
11662 if (TYPE_MODE (type) == DFmode && align < 64)
11663 return 64;
11664 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11665 return 128;
11666 }
11667 return align;
11668}
0ed08620
JH
11669\f
11670/* Emit RTL insns to initialize the variable parts of a trampoline.
11671 FNADDR is an RTX for the address of the function's pure code.
11672 CXT is an RTX for the static chain value for the function. */
11673void
11674x86_initialize_trampoline (tramp, fnaddr, cxt)
11675 rtx tramp, fnaddr, cxt;
11676{
11677 if (!TARGET_64BIT)
11678 {
11679 /* Compute offset from the end of the jmp to the target function. */
11680 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11681 plus_constant (tramp, 10),
11682 NULL_RTX, 1, OPTAB_DIRECT);
11683 emit_move_insn (gen_rtx_MEM (QImode, tramp),
d8bf17f9 11684 gen_int_mode (0xb9, QImode));
0ed08620
JH
11685 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11686 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
d8bf17f9 11687 gen_int_mode (0xe9, QImode));
0ed08620
JH
11688 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11689 }
11690 else
11691 {
11692 int offset = 0;
11693 /* Try to load address using shorter movl instead of movabs.
11694 We may want to support movq for kernel mode, but kernel does not use
11695 trampolines at the moment. */
11696 if (x86_64_zero_extended_value (fnaddr))
11697 {
11698 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11699 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11700 gen_int_mode (0xbb41, HImode));
0ed08620
JH
11701 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11702 gen_lowpart (SImode, fnaddr));
11703 offset += 6;
11704 }
11705 else
11706 {
11707 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11708 gen_int_mode (0xbb49, HImode));
0ed08620
JH
11709 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11710 fnaddr);
11711 offset += 10;
11712 }
11713 /* Load static chain using movabs to r10. */
11714 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11715 gen_int_mode (0xba49, HImode));
0ed08620
JH
11716 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11717 cxt);
11718 offset += 10;
11719 /* Jump to the r11 */
11720 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11721 gen_int_mode (0xff49, HImode));
0ed08620 11722 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
d8bf17f9 11723 gen_int_mode (0xe3, QImode));
0ed08620
JH
11724 offset += 3;
11725 if (offset > TRAMPOLINE_SIZE)
b531087a 11726 abort ();
0ed08620 11727 }
5791cc29
JT
11728
11729#ifdef TRANSFER_FROM_TRAMPOLINE
11730 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
11731 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
11732#endif
0ed08620 11733}
eeb06b1b 11734\f
6a2dd09a
RS
11735#define def_builtin(MASK, NAME, TYPE, CODE) \
11736do { \
11737 if ((MASK) & target_flags) \
11738 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11739 NULL, NULL_TREE); \
eeb06b1b 11740} while (0)
bd793c65 11741
bd793c65
BS
11742struct builtin_description
11743{
8b60264b
KG
11744 const unsigned int mask;
11745 const enum insn_code icode;
11746 const char *const name;
11747 const enum ix86_builtins code;
11748 const enum rtx_code comparison;
11749 const unsigned int flag;
bd793c65
BS
11750};
11751
fbe5eb6d
BS
11752/* Used for builtins that are enabled both by -msse and -msse2. */
11753#define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11754
8b60264b 11755static const struct builtin_description bdesc_comi[] =
bd793c65 11756{
1194ca05
JH
11757 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
11758 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
11759 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
11760 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
11761 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
11762 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
11763 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
11764 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
11765 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
11766 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
11767 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
11768 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
11769 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
11770 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
11771 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
11772 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
11773 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
11774 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
11775 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
11776 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
11777 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
11778 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
11779 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
11780 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
bd793c65
BS
11781};
11782
8b60264b 11783static const struct builtin_description bdesc_2arg[] =
bd793c65
BS
11784{
11785 /* SSE */
fbe5eb6d
BS
11786 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11787 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11788 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11789 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11790 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11791 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11792 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11793 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11794
11795 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11796 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11797 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11798 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11799 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11800 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11801 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11802 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11803 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11804 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11805 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11806 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11807 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11808 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11809 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
fbe5eb6d
BS
11810 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11811 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11812 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11813 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
fbe5eb6d
BS
11814 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11815
11816 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11817 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11818 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11819 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11820
1877be45
JH
11821 { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
11822 { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
11823 { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
11824 { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
11825
fbe5eb6d
BS
11826 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11827 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11828 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11829 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11830 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
bd793c65
BS
11831
11832 /* MMX */
eeb06b1b
BS
11833 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11834 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11835 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11836 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11837 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11838 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11839
11840 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11841 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11842 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11843 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11844 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11845 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11846 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11847 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11848
11849 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11850 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
fbe5eb6d 11851 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
eeb06b1b
BS
11852
11853 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11854 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11855 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11856 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11857
fbe5eb6d
BS
11858 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11859 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
eeb06b1b
BS
11860
11861 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11862 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11863 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11864 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11865 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11866 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11867
fbe5eb6d
BS
11868 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11869 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11870 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11871 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
eeb06b1b
BS
11872
11873 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11874 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11875 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11876 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11877 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11878 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
bd793c65
BS
11879
11880 /* Special. */
eeb06b1b
BS
11881 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11882 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11883 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
11884
fbe5eb6d
BS
11885 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
11886 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
eeb06b1b
BS
11887
11888 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
11889 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
11890 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
11891 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
11892 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
11893 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
11894
11895 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
11896 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
11897 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
11898 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
11899 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
11900 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
11901
11902 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
11903 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
11904 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
11905 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
11906
fbe5eb6d
BS
11907 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
11908 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
11909
11910 /* SSE2 */
11911 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
11912 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
11913 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
11914 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
11915 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
11916 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
11917 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
11918 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
11919
11920 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
11921 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
11922 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
11923 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
11924 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
11925 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
11926 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
11927 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
11928 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
11929 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
11930 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
11931 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
11932 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
11933 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
11934 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
fbe5eb6d
BS
11935 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
11936 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
11937 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
11938 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
fbe5eb6d
BS
11939 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
11940
11941 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
11942 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
11943 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
11944 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
11945
1877be45
JH
11946 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
11947 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
11948 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
11949 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
fbe5eb6d
BS
11950
11951 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
11952 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
11953 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
11954
11955 /* SSE2 MMX */
11956 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
11957 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
11958 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
11959 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
11960 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
11961 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
11962 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
11963 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
11964
11965 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
11966 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
11967 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
11968 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
11969 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
11970 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
11971 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
11972 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
11973
11974 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
11975 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
11976 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
11977 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
11978
916b60b7
BS
11979 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
11980 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
11981 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
11982 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
fbe5eb6d
BS
11983
11984 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
11985 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
11986
11987 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
11988 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
11989 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
11990 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
11991 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
11992 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
11993
11994 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
11995 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
11996 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
11997 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
11998
11999 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12000 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12001 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12002 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12003 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12004 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12005
916b60b7
BS
12006 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12007 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12008 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12009
12010 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12011 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12012
12013 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12014 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12015 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12016 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12017 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12018 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12019
12020 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12021 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12022 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12023 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12024 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12025 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12026
12027 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12028 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12029 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12030 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12031
12032 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12033
fbe5eb6d
BS
12034 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12035 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12036 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
bd793c65
BS
12037};
12038
8b60264b 12039static const struct builtin_description bdesc_1arg[] =
bd793c65 12040{
fbe5eb6d
BS
12041 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12042 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12043
12044 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12045 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12046 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12047
12048 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12049 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12050 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12051 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12052
12053 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12054 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12055 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12056
12057 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12058
12059 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12060 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
bd793c65 12061
fbe5eb6d
BS
12062 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12063 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12064 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12065 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12066 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
bd793c65 12067
fbe5eb6d 12068 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
bd793c65 12069
fbe5eb6d
BS
12070 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12071 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12072
12073 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12074 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12075 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }
bd793c65
BS
12076};
12077
f6155fda
SS
12078void
12079ix86_init_builtins ()
12080{
12081 if (TARGET_MMX)
12082 ix86_init_mmx_sse_builtins ();
12083}
12084
12085/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
bd793c65
BS
12086 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12087 builtins. */
e37af218 12088static void
f6155fda 12089ix86_init_mmx_sse_builtins ()
bd793c65 12090{
8b60264b 12091 const struct builtin_description * d;
77ebd435 12092 size_t i;
bd793c65
BS
12093
12094 tree pchar_type_node = build_pointer_type (char_type_node);
12095 tree pfloat_type_node = build_pointer_type (float_type_node);
12096 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
916b60b7 12097 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
bd793c65
BS
12098 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12099
12100 /* Comparisons. */
12101 tree int_ftype_v4sf_v4sf
b4de2f7d
AH
12102 = build_function_type_list (integer_type_node,
12103 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12104 tree v4si_ftype_v4sf_v4sf
b4de2f7d
AH
12105 = build_function_type_list (V4SI_type_node,
12106 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12107 /* MMX/SSE/integer conversions. */
bd793c65 12108 tree int_ftype_v4sf
b4de2f7d
AH
12109 = build_function_type_list (integer_type_node,
12110 V4SF_type_node, NULL_TREE);
bd793c65 12111 tree int_ftype_v8qi
b4de2f7d 12112 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
bd793c65 12113 tree v4sf_ftype_v4sf_int
b4de2f7d
AH
12114 = build_function_type_list (V4SF_type_node,
12115 V4SF_type_node, integer_type_node, NULL_TREE);
bd793c65 12116 tree v4sf_ftype_v4sf_v2si
b4de2f7d
AH
12117 = build_function_type_list (V4SF_type_node,
12118 V4SF_type_node, V2SI_type_node, NULL_TREE);
bd793c65 12119 tree int_ftype_v4hi_int
b4de2f7d
AH
12120 = build_function_type_list (integer_type_node,
12121 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 12122 tree v4hi_ftype_v4hi_int_int
e7a60f56 12123 = build_function_type_list (V4HI_type_node, V4HI_type_node,
b4de2f7d
AH
12124 integer_type_node, integer_type_node,
12125 NULL_TREE);
bd793c65
BS
12126 /* Miscellaneous. */
12127 tree v8qi_ftype_v4hi_v4hi
b4de2f7d
AH
12128 = build_function_type_list (V8QI_type_node,
12129 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12130 tree v4hi_ftype_v2si_v2si
b4de2f7d
AH
12131 = build_function_type_list (V4HI_type_node,
12132 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 12133 tree v4sf_ftype_v4sf_v4sf_int
b4de2f7d
AH
12134 = build_function_type_list (V4SF_type_node,
12135 V4SF_type_node, V4SF_type_node,
12136 integer_type_node, NULL_TREE);
bd793c65 12137 tree v2si_ftype_v4hi_v4hi
b4de2f7d
AH
12138 = build_function_type_list (V2SI_type_node,
12139 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12140 tree v4hi_ftype_v4hi_int
b4de2f7d 12141 = build_function_type_list (V4HI_type_node,
e7a60f56 12142 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 12143 tree v4hi_ftype_v4hi_di
b4de2f7d
AH
12144 = build_function_type_list (V4HI_type_node,
12145 V4HI_type_node, long_long_unsigned_type_node,
12146 NULL_TREE);
bd793c65 12147 tree v2si_ftype_v2si_di
b4de2f7d
AH
12148 = build_function_type_list (V2SI_type_node,
12149 V2SI_type_node, long_long_unsigned_type_node,
12150 NULL_TREE);
bd793c65 12151 tree void_ftype_void
b4de2f7d 12152 = build_function_type (void_type_node, void_list_node);
bd793c65 12153 tree void_ftype_unsigned
b4de2f7d 12154 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
bd793c65 12155 tree unsigned_ftype_void
b4de2f7d 12156 = build_function_type (unsigned_type_node, void_list_node);
bd793c65 12157 tree di_ftype_void
b4de2f7d 12158 = build_function_type (long_long_unsigned_type_node, void_list_node);
e37af218 12159 tree v4sf_ftype_void
b4de2f7d 12160 = build_function_type (V4SF_type_node, void_list_node);
bd793c65 12161 tree v2si_ftype_v4sf
b4de2f7d 12162 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12163 /* Loads/stores. */
bd793c65 12164 tree void_ftype_v8qi_v8qi_pchar
b4de2f7d
AH
12165 = build_function_type_list (void_type_node,
12166 V8QI_type_node, V8QI_type_node,
12167 pchar_type_node, NULL_TREE);
bd793c65 12168 tree v4sf_ftype_pfloat
b4de2f7d 12169 = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE);
bd793c65
BS
12170 /* @@@ the type is bogus */
12171 tree v4sf_ftype_v4sf_pv2si
b4de2f7d
AH
12172 = build_function_type_list (V4SF_type_node,
12173 V4SF_type_node, pv2di_type_node, NULL_TREE);
1255c85c 12174 tree void_ftype_pv2si_v4sf
b4de2f7d
AH
12175 = build_function_type_list (void_type_node,
12176 pv2di_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12177 tree void_ftype_pfloat_v4sf
b4de2f7d
AH
12178 = build_function_type_list (void_type_node,
12179 pfloat_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12180 tree void_ftype_pdi_di
b4de2f7d
AH
12181 = build_function_type_list (void_type_node,
12182 pdi_type_node, long_long_unsigned_type_node,
12183 NULL_TREE);
916b60b7 12184 tree void_ftype_pv2di_v2di
b4de2f7d
AH
12185 = build_function_type_list (void_type_node,
12186 pv2di_type_node, V2DI_type_node, NULL_TREE);
bd793c65
BS
12187 /* Normal vector unops. */
12188 tree v4sf_ftype_v4sf
b4de2f7d 12189 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
0f290768 12190
bd793c65
BS
12191 /* Normal vector binops. */
12192 tree v4sf_ftype_v4sf_v4sf
b4de2f7d
AH
12193 = build_function_type_list (V4SF_type_node,
12194 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12195 tree v8qi_ftype_v8qi_v8qi
b4de2f7d
AH
12196 = build_function_type_list (V8QI_type_node,
12197 V8QI_type_node, V8QI_type_node, NULL_TREE);
bd793c65 12198 tree v4hi_ftype_v4hi_v4hi
b4de2f7d
AH
12199 = build_function_type_list (V4HI_type_node,
12200 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12201 tree v2si_ftype_v2si_v2si
b4de2f7d
AH
12202 = build_function_type_list (V2SI_type_node,
12203 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 12204 tree di_ftype_di_di
b4de2f7d
AH
12205 = build_function_type_list (long_long_unsigned_type_node,
12206 long_long_unsigned_type_node,
12207 long_long_unsigned_type_node, NULL_TREE);
bd793c65 12208
47f339cf 12209 tree v2si_ftype_v2sf
ae3aa00d 12210 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12211 tree v2sf_ftype_v2si
b4de2f7d 12212 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
47f339cf 12213 tree v2si_ftype_v2si
b4de2f7d 12214 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
47f339cf 12215 tree v2sf_ftype_v2sf
b4de2f7d 12216 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12217 tree v2sf_ftype_v2sf_v2sf
b4de2f7d
AH
12218 = build_function_type_list (V2SF_type_node,
12219 V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12220 tree v2si_ftype_v2sf_v2sf
b4de2f7d
AH
12221 = build_function_type_list (V2SI_type_node,
12222 V2SF_type_node, V2SF_type_node, NULL_TREE);
fbe5eb6d
BS
12223 tree pint_type_node = build_pointer_type (integer_type_node);
12224 tree pdouble_type_node = build_pointer_type (double_type_node);
12225 tree int_ftype_v2df_v2df
b4de2f7d
AH
12226 = build_function_type_list (integer_type_node,
12227 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d
BS
12228
12229 tree ti_ftype_void
b4de2f7d 12230 = build_function_type (intTI_type_node, void_list_node);
fbe5eb6d 12231 tree ti_ftype_ti_ti
b4de2f7d
AH
12232 = build_function_type_list (intTI_type_node,
12233 intTI_type_node, intTI_type_node, NULL_TREE);
fbe5eb6d 12234 tree void_ftype_pvoid
b4de2f7d 12235 = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
fbe5eb6d 12236 tree v2di_ftype_di
b4de2f7d
AH
12237 = build_function_type_list (V2DI_type_node,
12238 long_long_unsigned_type_node, NULL_TREE);
fbe5eb6d 12239 tree v4sf_ftype_v4si
b4de2f7d 12240 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 12241 tree v4si_ftype_v4sf
b4de2f7d 12242 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 12243 tree v2df_ftype_v4si
b4de2f7d 12244 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 12245 tree v4si_ftype_v2df
b4de2f7d 12246 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12247 tree v2si_ftype_v2df
b4de2f7d 12248 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12249 tree v4sf_ftype_v2df
b4de2f7d 12250 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12251 tree v2df_ftype_v2si
b4de2f7d 12252 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
fbe5eb6d 12253 tree v2df_ftype_v4sf
b4de2f7d 12254 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 12255 tree int_ftype_v2df
b4de2f7d 12256 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12257 tree v2df_ftype_v2df_int
b4de2f7d
AH
12258 = build_function_type_list (V2DF_type_node,
12259 V2DF_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12260 tree v4sf_ftype_v4sf_v2df
b4de2f7d
AH
12261 = build_function_type_list (V4SF_type_node,
12262 V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12263 tree v2df_ftype_v2df_v4sf
b4de2f7d
AH
12264 = build_function_type_list (V2DF_type_node,
12265 V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 12266 tree v2df_ftype_v2df_v2df_int
b4de2f7d
AH
12267 = build_function_type_list (V2DF_type_node,
12268 V2DF_type_node, V2DF_type_node,
12269 integer_type_node,
12270 NULL_TREE);
fbe5eb6d 12271 tree v2df_ftype_v2df_pv2si
b4de2f7d
AH
12272 = build_function_type_list (V2DF_type_node,
12273 V2DF_type_node, pv2si_type_node, NULL_TREE);
fbe5eb6d 12274 tree void_ftype_pv2si_v2df
b4de2f7d
AH
12275 = build_function_type_list (void_type_node,
12276 pv2si_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12277 tree void_ftype_pdouble_v2df
b4de2f7d
AH
12278 = build_function_type_list (void_type_node,
12279 pdouble_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12280 tree void_ftype_pint_int
b4de2f7d
AH
12281 = build_function_type_list (void_type_node,
12282 pint_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12283 tree void_ftype_v16qi_v16qi_pchar
b4de2f7d
AH
12284 = build_function_type_list (void_type_node,
12285 V16QI_type_node, V16QI_type_node,
12286 pchar_type_node, NULL_TREE);
fbe5eb6d 12287 tree v2df_ftype_pdouble
b4de2f7d 12288 = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE);
fbe5eb6d 12289 tree v2df_ftype_v2df_v2df
b4de2f7d
AH
12290 = build_function_type_list (V2DF_type_node,
12291 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12292 tree v16qi_ftype_v16qi_v16qi
b4de2f7d
AH
12293 = build_function_type_list (V16QI_type_node,
12294 V16QI_type_node, V16QI_type_node, NULL_TREE);
fbe5eb6d 12295 tree v8hi_ftype_v8hi_v8hi
b4de2f7d
AH
12296 = build_function_type_list (V8HI_type_node,
12297 V8HI_type_node, V8HI_type_node, NULL_TREE);
fbe5eb6d 12298 tree v4si_ftype_v4si_v4si
b4de2f7d
AH
12299 = build_function_type_list (V4SI_type_node,
12300 V4SI_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 12301 tree v2di_ftype_v2di_v2di
b4de2f7d
AH
12302 = build_function_type_list (V2DI_type_node,
12303 V2DI_type_node, V2DI_type_node, NULL_TREE);
fbe5eb6d 12304 tree v2di_ftype_v2df_v2df
b4de2f7d
AH
12305 = build_function_type_list (V2DI_type_node,
12306 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12307 tree v2df_ftype_v2df
b4de2f7d 12308 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12309 tree v2df_ftype_double
b4de2f7d 12310 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
fbe5eb6d 12311 tree v2df_ftype_double_double
b4de2f7d
AH
12312 = build_function_type_list (V2DF_type_node,
12313 double_type_node, double_type_node, NULL_TREE);
fbe5eb6d 12314 tree int_ftype_v8hi_int
b4de2f7d
AH
12315 = build_function_type_list (integer_type_node,
12316 V8HI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12317 tree v8hi_ftype_v8hi_int_int
b4de2f7d
AH
12318 = build_function_type_list (V8HI_type_node,
12319 V8HI_type_node, integer_type_node,
12320 integer_type_node, NULL_TREE);
916b60b7 12321 tree v2di_ftype_v2di_int
b4de2f7d
AH
12322 = build_function_type_list (V2DI_type_node,
12323 V2DI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12324 tree v4si_ftype_v4si_int
b4de2f7d
AH
12325 = build_function_type_list (V4SI_type_node,
12326 V4SI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12327 tree v8hi_ftype_v8hi_int
b4de2f7d
AH
12328 = build_function_type_list (V8HI_type_node,
12329 V8HI_type_node, integer_type_node, NULL_TREE);
916b60b7 12330 tree v8hi_ftype_v8hi_v2di
b4de2f7d
AH
12331 = build_function_type_list (V8HI_type_node,
12332 V8HI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 12333 tree v4si_ftype_v4si_v2di
b4de2f7d
AH
12334 = build_function_type_list (V4SI_type_node,
12335 V4SI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 12336 tree v4si_ftype_v8hi_v8hi
b4de2f7d
AH
12337 = build_function_type_list (V4SI_type_node,
12338 V8HI_type_node, V8HI_type_node, NULL_TREE);
916b60b7 12339 tree di_ftype_v8qi_v8qi
b4de2f7d
AH
12340 = build_function_type_list (long_long_unsigned_type_node,
12341 V8QI_type_node, V8QI_type_node, NULL_TREE);
916b60b7 12342 tree v2di_ftype_v16qi_v16qi
b4de2f7d
AH
12343 = build_function_type_list (V2DI_type_node,
12344 V16QI_type_node, V16QI_type_node, NULL_TREE);
916b60b7 12345 tree int_ftype_v16qi
b4de2f7d 12346 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
47f339cf 12347
bd793c65
BS
12348 /* Add all builtins that are more or less simple operations on two
12349 operands. */
ca7558fc 12350 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
12351 {
12352 /* Use one of the operands; the target can have a different mode for
12353 mask-generating compares. */
12354 enum machine_mode mode;
12355 tree type;
12356
12357 if (d->name == 0)
12358 continue;
12359 mode = insn_data[d->icode].operand[1].mode;
12360
bd793c65
BS
12361 switch (mode)
12362 {
fbe5eb6d
BS
12363 case V16QImode:
12364 type = v16qi_ftype_v16qi_v16qi;
12365 break;
12366 case V8HImode:
12367 type = v8hi_ftype_v8hi_v8hi;
12368 break;
12369 case V4SImode:
12370 type = v4si_ftype_v4si_v4si;
12371 break;
12372 case V2DImode:
12373 type = v2di_ftype_v2di_v2di;
12374 break;
12375 case V2DFmode:
12376 type = v2df_ftype_v2df_v2df;
12377 break;
12378 case TImode:
12379 type = ti_ftype_ti_ti;
12380 break;
bd793c65
BS
12381 case V4SFmode:
12382 type = v4sf_ftype_v4sf_v4sf;
12383 break;
12384 case V8QImode:
12385 type = v8qi_ftype_v8qi_v8qi;
12386 break;
12387 case V4HImode:
12388 type = v4hi_ftype_v4hi_v4hi;
12389 break;
12390 case V2SImode:
12391 type = v2si_ftype_v2si_v2si;
12392 break;
bd793c65
BS
12393 case DImode:
12394 type = di_ftype_di_di;
12395 break;
12396
12397 default:
12398 abort ();
12399 }
0f290768 12400
bd793c65
BS
12401 /* Override for comparisons. */
12402 if (d->icode == CODE_FOR_maskcmpv4sf3
12403 || d->icode == CODE_FOR_maskncmpv4sf3
12404 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12405 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12406 type = v4si_ftype_v4sf_v4sf;
12407
fbe5eb6d
BS
12408 if (d->icode == CODE_FOR_maskcmpv2df3
12409 || d->icode == CODE_FOR_maskncmpv2df3
12410 || d->icode == CODE_FOR_vmmaskcmpv2df3
12411 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12412 type = v2di_ftype_v2df_v2df;
12413
eeb06b1b 12414 def_builtin (d->mask, d->name, type, d->code);
bd793c65
BS
12415 }
12416
12417 /* Add the remaining MMX insns with somewhat more complicated types. */
eeb06b1b
BS
12418 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12419 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12420 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12421 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12422 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12423 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12424 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12425
12426 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12427 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12428 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12429
12430 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12431 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12432
12433 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12434 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
0f290768 12435
bd793c65 12436 /* comi/ucomi insns. */
ca7558fc 12437 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
fbe5eb6d
BS
12438 if (d->mask == MASK_SSE2)
12439 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12440 else
12441 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
bd793c65 12442
1255c85c
BS
12443 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12444 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12445 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
bd793c65 12446
fbe5eb6d
BS
12447 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12448 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12449 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12450 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12451 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12452 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
bd793c65 12453
fbe5eb6d
BS
12454 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12455 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
bd793c65 12456
fbe5eb6d 12457 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
bd793c65 12458
fbe5eb6d
BS
12459 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
12460 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
12461 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
12462 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12463 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12464 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
bd793c65 12465
fbe5eb6d
BS
12466 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12467 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12468 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12469 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
bd793c65 12470
fbe5eb6d
BS
12471 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12472 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12473 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12474 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
bd793c65 12475
fbe5eb6d 12476 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
bd793c65 12477
916b60b7 12478 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
bd793c65 12479
fbe5eb6d
BS
12480 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12481 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12482 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12483 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12484 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12485 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
bd793c65 12486
fbe5eb6d 12487 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
bd793c65 12488
47f339cf
BS
12489 /* Original 3DNow! */
12490 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12491 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12492 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12493 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12494 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12495 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12496 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12497 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12498 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12499 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12500 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12501 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12502 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12503 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12504 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12505 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12506 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12507 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12508 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12509 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
47f339cf
BS
12510
12511 /* 3DNow! extension as used in the Athlon CPU. */
12512 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12513 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12514 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12515 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12516 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12517 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12518
fbe5eb6d
BS
12519 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12520
12521 /* SSE2 */
12522 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12523 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12524
12525 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12526 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12527
12528 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
12529 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
12530 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
12531 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12532 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12533 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12534
12535 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12536 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12537 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12538 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12539
12540 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
916b60b7 12541 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
fbe5eb6d
BS
12542 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12543 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
916b60b7 12544 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
fbe5eb6d
BS
12545
12546 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12547 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12548 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
916b60b7 12549 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
fbe5eb6d
BS
12550
12551 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12552 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12553
12554 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12555
12556 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
916b60b7 12557 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
fbe5eb6d
BS
12558
12559 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12560 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12561 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12562 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12563 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12564
12565 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12566
12567 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12568 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12569
12570 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12571 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12572 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12573
12574 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12575 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12576 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12577
12578 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12579 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12580 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12581 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
12582 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
12583 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12584 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12585
12586 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
12587 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12588 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
916b60b7
BS
12589
12590 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12591 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12592 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12593
12594 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12595 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12596 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12597
12598 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12599 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12600
ab3146fd 12601 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
916b60b7
BS
12602 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12603 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12604 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12605
ab3146fd 12606 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
916b60b7
BS
12607 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12608 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12609 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12610
12611 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12612 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12613
12614 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
bd793c65
BS
12615}
12616
12617/* Errors in the source file can cause expand_expr to return const0_rtx
12618 where we expect a vector. To avoid crashing, use one of the vector
12619 clear instructions. */
12620static rtx
12621safe_vector_operand (x, mode)
12622 rtx x;
12623 enum machine_mode mode;
12624{
12625 if (x != const0_rtx)
12626 return x;
12627 x = gen_reg_rtx (mode);
12628
47f339cf 12629 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
bd793c65
BS
12630 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12631 : gen_rtx_SUBREG (DImode, x, 0)));
12632 else
e37af218
RH
12633 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12634 : gen_rtx_SUBREG (V4SFmode, x, 0)));
bd793c65
BS
12635 return x;
12636}
12637
12638/* Subroutine of ix86_expand_builtin to take care of binop insns. */
12639
12640static rtx
12641ix86_expand_binop_builtin (icode, arglist, target)
12642 enum insn_code icode;
12643 tree arglist;
12644 rtx target;
12645{
12646 rtx pat;
12647 tree arg0 = TREE_VALUE (arglist);
12648 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12649 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12650 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12651 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12652 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12653 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12654
12655 if (VECTOR_MODE_P (mode0))
12656 op0 = safe_vector_operand (op0, mode0);
12657 if (VECTOR_MODE_P (mode1))
12658 op1 = safe_vector_operand (op1, mode1);
12659
12660 if (! target
12661 || GET_MODE (target) != tmode
12662 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12663 target = gen_reg_rtx (tmode);
12664
12665 /* In case the insn wants input operands in modes different from
12666 the result, abort. */
12667 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
12668 abort ();
12669
12670 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12671 op0 = copy_to_mode_reg (mode0, op0);
12672 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12673 op1 = copy_to_mode_reg (mode1, op1);
12674
59bef189
RH
12675 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12676 yet one of the two must not be a memory. This is normally enforced
12677 by expanders, but we didn't bother to create one here. */
12678 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12679 op0 = copy_to_mode_reg (mode0, op0);
12680
bd793c65
BS
12681 pat = GEN_FCN (icode) (target, op0, op1);
12682 if (! pat)
12683 return 0;
12684 emit_insn (pat);
12685 return target;
12686}
12687
12688/* Subroutine of ix86_expand_builtin to take care of stores. */
12689
12690static rtx
e37af218 12691ix86_expand_store_builtin (icode, arglist)
bd793c65
BS
12692 enum insn_code icode;
12693 tree arglist;
bd793c65
BS
12694{
12695 rtx pat;
12696 tree arg0 = TREE_VALUE (arglist);
12697 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12698 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12699 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12700 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12701 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12702
12703 if (VECTOR_MODE_P (mode1))
12704 op1 = safe_vector_operand (op1, mode1);
12705
12706 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
59bef189
RH
12707
12708 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12709 op1 = copy_to_mode_reg (mode1, op1);
12710
bd793c65
BS
12711 pat = GEN_FCN (icode) (op0, op1);
12712 if (pat)
12713 emit_insn (pat);
12714 return 0;
12715}
12716
12717/* Subroutine of ix86_expand_builtin to take care of unop insns. */
12718
12719static rtx
12720ix86_expand_unop_builtin (icode, arglist, target, do_load)
12721 enum insn_code icode;
12722 tree arglist;
12723 rtx target;
12724 int do_load;
12725{
12726 rtx pat;
12727 tree arg0 = TREE_VALUE (arglist);
12728 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12729 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12730 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12731
12732 if (! target
12733 || GET_MODE (target) != tmode
12734 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12735 target = gen_reg_rtx (tmode);
12736 if (do_load)
12737 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12738 else
12739 {
12740 if (VECTOR_MODE_P (mode0))
12741 op0 = safe_vector_operand (op0, mode0);
12742
12743 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12744 op0 = copy_to_mode_reg (mode0, op0);
12745 }
12746
12747 pat = GEN_FCN (icode) (target, op0);
12748 if (! pat)
12749 return 0;
12750 emit_insn (pat);
12751 return target;
12752}
12753
12754/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12755 sqrtss, rsqrtss, rcpss. */
12756
12757static rtx
12758ix86_expand_unop1_builtin (icode, arglist, target)
12759 enum insn_code icode;
12760 tree arglist;
12761 rtx target;
12762{
12763 rtx pat;
12764 tree arg0 = TREE_VALUE (arglist);
59bef189 12765 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
bd793c65
BS
12766 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12767 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12768
12769 if (! target
12770 || GET_MODE (target) != tmode
12771 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12772 target = gen_reg_rtx (tmode);
12773
12774 if (VECTOR_MODE_P (mode0))
12775 op0 = safe_vector_operand (op0, mode0);
12776
12777 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12778 op0 = copy_to_mode_reg (mode0, op0);
fce5a9f2 12779
59bef189
RH
12780 op1 = op0;
12781 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12782 op1 = copy_to_mode_reg (mode0, op1);
fce5a9f2 12783
59bef189 12784 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
12785 if (! pat)
12786 return 0;
12787 emit_insn (pat);
12788 return target;
12789}
12790
12791/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12792
12793static rtx
12794ix86_expand_sse_compare (d, arglist, target)
8b60264b 12795 const struct builtin_description *d;
bd793c65
BS
12796 tree arglist;
12797 rtx target;
12798{
12799 rtx pat;
12800 tree arg0 = TREE_VALUE (arglist);
12801 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12802 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12803 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12804 rtx op2;
12805 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12806 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12807 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12808 enum rtx_code comparison = d->comparison;
12809
12810 if (VECTOR_MODE_P (mode0))
12811 op0 = safe_vector_operand (op0, mode0);
12812 if (VECTOR_MODE_P (mode1))
12813 op1 = safe_vector_operand (op1, mode1);
12814
12815 /* Swap operands if we have a comparison that isn't available in
12816 hardware. */
12817 if (d->flag)
12818 {
21e1b5f1
BS
12819 rtx tmp = gen_reg_rtx (mode1);
12820 emit_move_insn (tmp, op1);
bd793c65 12821 op1 = op0;
21e1b5f1 12822 op0 = tmp;
bd793c65 12823 }
21e1b5f1
BS
12824
12825 if (! target
12826 || GET_MODE (target) != tmode
12827 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
bd793c65
BS
12828 target = gen_reg_rtx (tmode);
12829
12830 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12831 op0 = copy_to_mode_reg (mode0, op0);
12832 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12833 op1 = copy_to_mode_reg (mode1, op1);
12834
12835 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12836 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12837 if (! pat)
12838 return 0;
12839 emit_insn (pat);
12840 return target;
12841}
12842
12843/* Subroutine of ix86_expand_builtin to take care of comi insns. */
12844
12845static rtx
12846ix86_expand_sse_comi (d, arglist, target)
8b60264b 12847 const struct builtin_description *d;
bd793c65
BS
12848 tree arglist;
12849 rtx target;
12850{
12851 rtx pat;
12852 tree arg0 = TREE_VALUE (arglist);
12853 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12854 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12855 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12856 rtx op2;
12857 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
12858 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
12859 enum rtx_code comparison = d->comparison;
12860
12861 if (VECTOR_MODE_P (mode0))
12862 op0 = safe_vector_operand (op0, mode0);
12863 if (VECTOR_MODE_P (mode1))
12864 op1 = safe_vector_operand (op1, mode1);
12865
12866 /* Swap operands if we have a comparison that isn't available in
12867 hardware. */
12868 if (d->flag)
12869 {
12870 rtx tmp = op1;
12871 op1 = op0;
12872 op0 = tmp;
bd793c65
BS
12873 }
12874
12875 target = gen_reg_rtx (SImode);
12876 emit_move_insn (target, const0_rtx);
12877 target = gen_rtx_SUBREG (QImode, target, 0);
12878
12879 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12880 op0 = copy_to_mode_reg (mode0, op0);
12881 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12882 op1 = copy_to_mode_reg (mode1, op1);
12883
12884 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
1194ca05 12885 pat = GEN_FCN (d->icode) (op0, op1);
bd793c65
BS
12886 if (! pat)
12887 return 0;
12888 emit_insn (pat);
29628f27
BS
12889 emit_insn (gen_rtx_SET (VOIDmode,
12890 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
12891 gen_rtx_fmt_ee (comparison, QImode,
1194ca05 12892 SET_DEST (pat),
29628f27 12893 const0_rtx)));
bd793c65 12894
6f1a6c5b 12895 return SUBREG_REG (target);
bd793c65
BS
12896}
12897
12898/* Expand an expression EXP that calls a built-in function,
12899 with result going to TARGET if that's convenient
12900 (and in mode MODE if that's convenient).
12901 SUBTARGET may be used as the target for computing one of EXP's operands.
12902 IGNORE is nonzero if the value is to be ignored. */
12903
12904rtx
12905ix86_expand_builtin (exp, target, subtarget, mode, ignore)
12906 tree exp;
12907 rtx target;
12908 rtx subtarget ATTRIBUTE_UNUSED;
12909 enum machine_mode mode ATTRIBUTE_UNUSED;
12910 int ignore ATTRIBUTE_UNUSED;
12911{
8b60264b 12912 const struct builtin_description *d;
77ebd435 12913 size_t i;
bd793c65
BS
12914 enum insn_code icode;
12915 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12916 tree arglist = TREE_OPERAND (exp, 1);
e37af218 12917 tree arg0, arg1, arg2;
bd793c65
BS
12918 rtx op0, op1, op2, pat;
12919 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 12920 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
12921
12922 switch (fcode)
12923 {
12924 case IX86_BUILTIN_EMMS:
12925 emit_insn (gen_emms ());
12926 return 0;
12927
12928 case IX86_BUILTIN_SFENCE:
12929 emit_insn (gen_sfence ());
12930 return 0;
12931
bd793c65 12932 case IX86_BUILTIN_PEXTRW:
fbe5eb6d
BS
12933 case IX86_BUILTIN_PEXTRW128:
12934 icode = (fcode == IX86_BUILTIN_PEXTRW
12935 ? CODE_FOR_mmx_pextrw
12936 : CODE_FOR_sse2_pextrw);
bd793c65
BS
12937 arg0 = TREE_VALUE (arglist);
12938 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12939 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12940 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12941 tmode = insn_data[icode].operand[0].mode;
12942 mode0 = insn_data[icode].operand[1].mode;
12943 mode1 = insn_data[icode].operand[2].mode;
12944
12945 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12946 op0 = copy_to_mode_reg (mode0, op0);
12947 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12948 {
12949 /* @@@ better error message */
12950 error ("selector must be an immediate");
6f1a6c5b 12951 return gen_reg_rtx (tmode);
bd793c65
BS
12952 }
12953 if (target == 0
12954 || GET_MODE (target) != tmode
12955 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12956 target = gen_reg_rtx (tmode);
12957 pat = GEN_FCN (icode) (target, op0, op1);
12958 if (! pat)
12959 return 0;
12960 emit_insn (pat);
12961 return target;
12962
12963 case IX86_BUILTIN_PINSRW:
fbe5eb6d
BS
12964 case IX86_BUILTIN_PINSRW128:
12965 icode = (fcode == IX86_BUILTIN_PINSRW
12966 ? CODE_FOR_mmx_pinsrw
12967 : CODE_FOR_sse2_pinsrw);
bd793c65
BS
12968 arg0 = TREE_VALUE (arglist);
12969 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12970 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12971 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12972 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12973 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12974 tmode = insn_data[icode].operand[0].mode;
12975 mode0 = insn_data[icode].operand[1].mode;
12976 mode1 = insn_data[icode].operand[2].mode;
12977 mode2 = insn_data[icode].operand[3].mode;
12978
12979 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12980 op0 = copy_to_mode_reg (mode0, op0);
12981 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12982 op1 = copy_to_mode_reg (mode1, op1);
12983 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12984 {
12985 /* @@@ better error message */
12986 error ("selector must be an immediate");
12987 return const0_rtx;
12988 }
12989 if (target == 0
12990 || GET_MODE (target) != tmode
12991 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12992 target = gen_reg_rtx (tmode);
12993 pat = GEN_FCN (icode) (target, op0, op1, op2);
12994 if (! pat)
12995 return 0;
12996 emit_insn (pat);
12997 return target;
12998
12999 case IX86_BUILTIN_MASKMOVQ:
fbe5eb6d
BS
13000 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13001 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13002 : CODE_FOR_sse2_maskmovdqu);
bd793c65
BS
13003 /* Note the arg order is different from the operand order. */
13004 arg1 = TREE_VALUE (arglist);
13005 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13006 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13007 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13008 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13009 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13010 mode0 = insn_data[icode].operand[0].mode;
13011 mode1 = insn_data[icode].operand[1].mode;
13012 mode2 = insn_data[icode].operand[2].mode;
13013
5c464583 13014 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
bd793c65
BS
13015 op0 = copy_to_mode_reg (mode0, op0);
13016 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13017 op1 = copy_to_mode_reg (mode1, op1);
13018 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13019 op2 = copy_to_mode_reg (mode2, op2);
13020 pat = GEN_FCN (icode) (op0, op1, op2);
13021 if (! pat)
13022 return 0;
13023 emit_insn (pat);
13024 return 0;
13025
13026 case IX86_BUILTIN_SQRTSS:
13027 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13028 case IX86_BUILTIN_RSQRTSS:
13029 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13030 case IX86_BUILTIN_RCPSS:
13031 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13032
13033 case IX86_BUILTIN_LOADAPS:
13034 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13035
13036 case IX86_BUILTIN_LOADUPS:
13037 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13038
13039 case IX86_BUILTIN_STOREAPS:
e37af218 13040 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
bd793c65 13041 case IX86_BUILTIN_STOREUPS:
e37af218 13042 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
bd793c65
BS
13043
13044 case IX86_BUILTIN_LOADSS:
13045 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13046
13047 case IX86_BUILTIN_STORESS:
e37af218 13048 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
bd793c65 13049
0f290768 13050 case IX86_BUILTIN_LOADHPS:
bd793c65 13051 case IX86_BUILTIN_LOADLPS:
fbe5eb6d
BS
13052 case IX86_BUILTIN_LOADHPD:
13053 case IX86_BUILTIN_LOADLPD:
13054 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13055 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13056 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13057 : CODE_FOR_sse2_movlpd);
bd793c65
BS
13058 arg0 = TREE_VALUE (arglist);
13059 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13060 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13061 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13062 tmode = insn_data[icode].operand[0].mode;
13063 mode0 = insn_data[icode].operand[1].mode;
13064 mode1 = insn_data[icode].operand[2].mode;
13065
13066 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13067 op0 = copy_to_mode_reg (mode0, op0);
13068 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13069 if (target == 0
13070 || GET_MODE (target) != tmode
13071 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13072 target = gen_reg_rtx (tmode);
13073 pat = GEN_FCN (icode) (target, op0, op1);
13074 if (! pat)
13075 return 0;
13076 emit_insn (pat);
13077 return target;
0f290768 13078
bd793c65
BS
13079 case IX86_BUILTIN_STOREHPS:
13080 case IX86_BUILTIN_STORELPS:
fbe5eb6d
BS
13081 case IX86_BUILTIN_STOREHPD:
13082 case IX86_BUILTIN_STORELPD:
13083 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13084 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13085 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13086 : CODE_FOR_sse2_movlpd);
bd793c65
BS
13087 arg0 = TREE_VALUE (arglist);
13088 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13089 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13090 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13091 mode0 = insn_data[icode].operand[1].mode;
13092 mode1 = insn_data[icode].operand[2].mode;
13093
13094 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13095 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13096 op1 = copy_to_mode_reg (mode1, op1);
13097
13098 pat = GEN_FCN (icode) (op0, op0, op1);
13099 if (! pat)
13100 return 0;
13101 emit_insn (pat);
13102 return 0;
13103
13104 case IX86_BUILTIN_MOVNTPS:
e37af218 13105 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
bd793c65 13106 case IX86_BUILTIN_MOVNTQ:
e37af218 13107 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
bd793c65
BS
13108
13109 case IX86_BUILTIN_LDMXCSR:
13110 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13111 target = assign_386_stack_local (SImode, 0);
13112 emit_move_insn (target, op0);
13113 emit_insn (gen_ldmxcsr (target));
13114 return 0;
13115
13116 case IX86_BUILTIN_STMXCSR:
13117 target = assign_386_stack_local (SImode, 0);
13118 emit_insn (gen_stmxcsr (target));
13119 return copy_to_mode_reg (SImode, target);
13120
bd793c65 13121 case IX86_BUILTIN_SHUFPS:
fbe5eb6d
BS
13122 case IX86_BUILTIN_SHUFPD:
13123 icode = (fcode == IX86_BUILTIN_SHUFPS
13124 ? CODE_FOR_sse_shufps
13125 : CODE_FOR_sse2_shufpd);
bd793c65
BS
13126 arg0 = TREE_VALUE (arglist);
13127 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13128 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13129 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13130 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13131 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13132 tmode = insn_data[icode].operand[0].mode;
13133 mode0 = insn_data[icode].operand[1].mode;
13134 mode1 = insn_data[icode].operand[2].mode;
13135 mode2 = insn_data[icode].operand[3].mode;
13136
13137 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13138 op0 = copy_to_mode_reg (mode0, op0);
13139 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13140 op1 = copy_to_mode_reg (mode1, op1);
13141 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13142 {
13143 /* @@@ better error message */
13144 error ("mask must be an immediate");
6f1a6c5b 13145 return gen_reg_rtx (tmode);
bd793c65
BS
13146 }
13147 if (target == 0
13148 || GET_MODE (target) != tmode
13149 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13150 target = gen_reg_rtx (tmode);
13151 pat = GEN_FCN (icode) (target, op0, op1, op2);
13152 if (! pat)
13153 return 0;
13154 emit_insn (pat);
13155 return target;
13156
13157 case IX86_BUILTIN_PSHUFW:
fbe5eb6d
BS
13158 case IX86_BUILTIN_PSHUFD:
13159 case IX86_BUILTIN_PSHUFHW:
13160 case IX86_BUILTIN_PSHUFLW:
13161 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13162 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13163 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13164 : CODE_FOR_mmx_pshufw);
bd793c65
BS
13165 arg0 = TREE_VALUE (arglist);
13166 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13167 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13168 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13169 tmode = insn_data[icode].operand[0].mode;
29628f27
BS
13170 mode1 = insn_data[icode].operand[1].mode;
13171 mode2 = insn_data[icode].operand[2].mode;
bd793c65 13172
29628f27
BS
13173 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13174 op0 = copy_to_mode_reg (mode1, op0);
13175 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
bd793c65
BS
13176 {
13177 /* @@@ better error message */
13178 error ("mask must be an immediate");
13179 return const0_rtx;
13180 }
13181 if (target == 0
13182 || GET_MODE (target) != tmode
13183 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13184 target = gen_reg_rtx (tmode);
29628f27 13185 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
13186 if (! pat)
13187 return 0;
13188 emit_insn (pat);
13189 return target;
13190
ab3146fd
ZD
13191 case IX86_BUILTIN_PSLLDQI128:
13192 case IX86_BUILTIN_PSRLDQI128:
13193 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13194 : CODE_FOR_sse2_lshrti3);
13195 arg0 = TREE_VALUE (arglist);
13196 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13197 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13198 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13199 tmode = insn_data[icode].operand[0].mode;
13200 mode1 = insn_data[icode].operand[1].mode;
13201 mode2 = insn_data[icode].operand[2].mode;
13202
13203 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13204 {
13205 op0 = copy_to_reg (op0);
13206 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
13207 }
13208 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13209 {
13210 error ("shift must be an immediate");
13211 return const0_rtx;
13212 }
13213 target = gen_reg_rtx (V2DImode);
13214 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
13215 if (! pat)
13216 return 0;
13217 emit_insn (pat);
13218 return target;
13219
47f339cf
BS
13220 case IX86_BUILTIN_FEMMS:
13221 emit_insn (gen_femms ());
13222 return NULL_RTX;
13223
13224 case IX86_BUILTIN_PAVGUSB:
13225 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13226
13227 case IX86_BUILTIN_PF2ID:
13228 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13229
13230 case IX86_BUILTIN_PFACC:
13231 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13232
13233 case IX86_BUILTIN_PFADD:
13234 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13235
13236 case IX86_BUILTIN_PFCMPEQ:
13237 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13238
13239 case IX86_BUILTIN_PFCMPGE:
13240 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13241
13242 case IX86_BUILTIN_PFCMPGT:
13243 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13244
13245 case IX86_BUILTIN_PFMAX:
13246 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13247
13248 case IX86_BUILTIN_PFMIN:
13249 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13250
13251 case IX86_BUILTIN_PFMUL:
13252 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13253
13254 case IX86_BUILTIN_PFRCP:
13255 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13256
13257 case IX86_BUILTIN_PFRCPIT1:
13258 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13259
13260 case IX86_BUILTIN_PFRCPIT2:
13261 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13262
13263 case IX86_BUILTIN_PFRSQIT1:
13264 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13265
13266 case IX86_BUILTIN_PFRSQRT:
13267 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13268
13269 case IX86_BUILTIN_PFSUB:
13270 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13271
13272 case IX86_BUILTIN_PFSUBR:
13273 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13274
13275 case IX86_BUILTIN_PI2FD:
13276 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13277
13278 case IX86_BUILTIN_PMULHRW:
13279 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13280
47f339cf
BS
13281 case IX86_BUILTIN_PF2IW:
13282 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13283
13284 case IX86_BUILTIN_PFNACC:
13285 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13286
13287 case IX86_BUILTIN_PFPNACC:
13288 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13289
13290 case IX86_BUILTIN_PI2FW:
13291 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13292
13293 case IX86_BUILTIN_PSWAPDSI:
13294 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13295
13296 case IX86_BUILTIN_PSWAPDSF:
13297 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13298
e37af218
RH
13299 case IX86_BUILTIN_SSE_ZERO:
13300 target = gen_reg_rtx (V4SFmode);
13301 emit_insn (gen_sse_clrv4sf (target));
bd793c65
BS
13302 return target;
13303
bd793c65
BS
13304 case IX86_BUILTIN_MMX_ZERO:
13305 target = gen_reg_rtx (DImode);
13306 emit_insn (gen_mmx_clrdi (target));
13307 return target;
13308
fbe5eb6d
BS
13309 case IX86_BUILTIN_SQRTSD:
13310 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13311 case IX86_BUILTIN_LOADAPD:
13312 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13313 case IX86_BUILTIN_LOADUPD:
13314 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13315
13316 case IX86_BUILTIN_STOREAPD:
13317 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13318 case IX86_BUILTIN_STOREUPD:
13319 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13320
13321 case IX86_BUILTIN_LOADSD:
13322 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13323
13324 case IX86_BUILTIN_STORESD:
13325 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13326
13327 case IX86_BUILTIN_SETPD1:
13328 target = assign_386_stack_local (DFmode, 0);
13329 arg0 = TREE_VALUE (arglist);
13330 emit_move_insn (adjust_address (target, DFmode, 0),
13331 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13332 op0 = gen_reg_rtx (V2DFmode);
13333 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13334 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13335 return op0;
13336
13337 case IX86_BUILTIN_SETPD:
13338 target = assign_386_stack_local (V2DFmode, 0);
13339 arg0 = TREE_VALUE (arglist);
13340 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13341 emit_move_insn (adjust_address (target, DFmode, 0),
13342 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13343 emit_move_insn (adjust_address (target, DFmode, 8),
13344 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13345 op0 = gen_reg_rtx (V2DFmode);
13346 emit_insn (gen_sse2_movapd (op0, target));
13347 return op0;
13348
13349 case IX86_BUILTIN_LOADRPD:
13350 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13351 gen_reg_rtx (V2DFmode), 1);
13352 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13353 return target;
13354
13355 case IX86_BUILTIN_LOADPD1:
13356 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13357 gen_reg_rtx (V2DFmode), 1);
13358 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13359 return target;
13360
13361 case IX86_BUILTIN_STOREPD1:
13362 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13363 case IX86_BUILTIN_STORERPD:
13364 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13365
48126a97
JH
13366 case IX86_BUILTIN_CLRPD:
13367 target = gen_reg_rtx (V2DFmode);
13368 emit_insn (gen_sse_clrv2df (target));
13369 return target;
13370
fbe5eb6d
BS
13371 case IX86_BUILTIN_MFENCE:
13372 emit_insn (gen_sse2_mfence ());
13373 return 0;
13374 case IX86_BUILTIN_LFENCE:
13375 emit_insn (gen_sse2_lfence ());
13376 return 0;
13377
13378 case IX86_BUILTIN_CLFLUSH:
13379 arg0 = TREE_VALUE (arglist);
13380 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13381 icode = CODE_FOR_sse2_clflush;
1194ca05
JH
13382 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
13383 op0 = copy_to_mode_reg (Pmode, op0);
fbe5eb6d
BS
13384
13385 emit_insn (gen_sse2_clflush (op0));
13386 return 0;
13387
13388 case IX86_BUILTIN_MOVNTPD:
13389 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13390 case IX86_BUILTIN_MOVNTDQ:
916b60b7 13391 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
fbe5eb6d
BS
13392 case IX86_BUILTIN_MOVNTI:
13393 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13394
bd793c65
BS
13395 default:
13396 break;
13397 }
13398
ca7558fc 13399 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
13400 if (d->code == fcode)
13401 {
13402 /* Compares are treated specially. */
13403 if (d->icode == CODE_FOR_maskcmpv4sf3
13404 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13405 || d->icode == CODE_FOR_maskncmpv4sf3
fbe5eb6d
BS
13406 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13407 || d->icode == CODE_FOR_maskcmpv2df3
13408 || d->icode == CODE_FOR_vmmaskcmpv2df3
13409 || d->icode == CODE_FOR_maskncmpv2df3
13410 || d->icode == CODE_FOR_vmmaskncmpv2df3)
bd793c65
BS
13411 return ix86_expand_sse_compare (d, arglist, target);
13412
13413 return ix86_expand_binop_builtin (d->icode, arglist, target);
13414 }
13415
ca7558fc 13416 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
bd793c65
BS
13417 if (d->code == fcode)
13418 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 13419
ca7558fc 13420 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
bd793c65
BS
13421 if (d->code == fcode)
13422 return ix86_expand_sse_comi (d, arglist, target);
0f290768 13423
bd793c65
BS
13424 /* @@@ Should really do something sensible here. */
13425 return 0;
bd793c65 13426}
4211a8fb
JH
13427
13428/* Store OPERAND to the memory after reload is completed. This means
f710504c 13429 that we can't easily use assign_stack_local. */
4211a8fb
JH
13430rtx
13431ix86_force_to_memory (mode, operand)
13432 enum machine_mode mode;
13433 rtx operand;
13434{
898d374d 13435 rtx result;
4211a8fb
JH
13436 if (!reload_completed)
13437 abort ();
898d374d
JH
13438 if (TARGET_64BIT && TARGET_RED_ZONE)
13439 {
13440 result = gen_rtx_MEM (mode,
13441 gen_rtx_PLUS (Pmode,
13442 stack_pointer_rtx,
13443 GEN_INT (-RED_ZONE_SIZE)));
13444 emit_move_insn (result, operand);
13445 }
13446 else if (TARGET_64BIT && !TARGET_RED_ZONE)
4211a8fb 13447 {
898d374d 13448 switch (mode)
4211a8fb 13449 {
898d374d
JH
13450 case HImode:
13451 case SImode:
13452 operand = gen_lowpart (DImode, operand);
13453 /* FALLTHRU */
13454 case DImode:
4211a8fb 13455 emit_insn (
898d374d
JH
13456 gen_rtx_SET (VOIDmode,
13457 gen_rtx_MEM (DImode,
13458 gen_rtx_PRE_DEC (DImode,
13459 stack_pointer_rtx)),
13460 operand));
13461 break;
13462 default:
13463 abort ();
13464 }
13465 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13466 }
13467 else
13468 {
13469 switch (mode)
13470 {
13471 case DImode:
13472 {
13473 rtx operands[2];
13474 split_di (&operand, 1, operands, operands + 1);
13475 emit_insn (
13476 gen_rtx_SET (VOIDmode,
13477 gen_rtx_MEM (SImode,
13478 gen_rtx_PRE_DEC (Pmode,
13479 stack_pointer_rtx)),
13480 operands[1]));
13481 emit_insn (
13482 gen_rtx_SET (VOIDmode,
13483 gen_rtx_MEM (SImode,
13484 gen_rtx_PRE_DEC (Pmode,
13485 stack_pointer_rtx)),
13486 operands[0]));
13487 }
13488 break;
13489 case HImode:
13490 /* It is better to store HImodes as SImodes. */
13491 if (!TARGET_PARTIAL_REG_STALL)
13492 operand = gen_lowpart (SImode, operand);
13493 /* FALLTHRU */
13494 case SImode:
4211a8fb 13495 emit_insn (
898d374d
JH
13496 gen_rtx_SET (VOIDmode,
13497 gen_rtx_MEM (GET_MODE (operand),
13498 gen_rtx_PRE_DEC (SImode,
13499 stack_pointer_rtx)),
13500 operand));
13501 break;
13502 default:
13503 abort ();
4211a8fb 13504 }
898d374d 13505 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 13506 }
898d374d 13507 return result;
4211a8fb
JH
13508}
13509
13510/* Free operand from the memory. */
13511void
13512ix86_free_from_memory (mode)
13513 enum machine_mode mode;
13514{
898d374d
JH
13515 if (!TARGET_64BIT || !TARGET_RED_ZONE)
13516 {
13517 int size;
13518
13519 if (mode == DImode || TARGET_64BIT)
13520 size = 8;
13521 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13522 size = 2;
13523 else
13524 size = 4;
13525 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13526 to pop or add instruction if registers are available. */
13527 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13528 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13529 GEN_INT (size))));
13530 }
4211a8fb 13531}
a946dd00 13532
f84aa48a
JH
13533/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13534 QImode must go into class Q_REGS.
13535 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
892a2d68 13536 movdf to do mem-to-mem moves through integer regs. */
f84aa48a
JH
13537enum reg_class
13538ix86_preferred_reload_class (x, class)
13539 rtx x;
13540 enum reg_class class;
13541{
1877be45
JH
13542 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
13543 return NO_REGS;
f84aa48a
JH
13544 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13545 {
13546 /* SSE can't load any constant directly yet. */
13547 if (SSE_CLASS_P (class))
13548 return NO_REGS;
13549 /* Floats can load 0 and 1. */
13550 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13551 {
13552 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13553 if (MAYBE_SSE_CLASS_P (class))
13554 return (reg_class_subset_p (class, GENERAL_REGS)
13555 ? GENERAL_REGS : FLOAT_REGS);
13556 else
13557 return class;
13558 }
13559 /* General regs can load everything. */
13560 if (reg_class_subset_p (class, GENERAL_REGS))
13561 return GENERAL_REGS;
13562 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13563 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13564 return NO_REGS;
13565 }
13566 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13567 return NO_REGS;
13568 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13569 return Q_REGS;
13570 return class;
13571}
13572
13573/* If we are copying between general and FP registers, we need a memory
13574 location. The same is true for SSE and MMX registers.
13575
13576 The macro can't work reliably when one of the CLASSES is class containing
13577 registers from multiple units (SSE, MMX, integer). We avoid this by never
13578 combining those units in single alternative in the machine description.
13579 Ensure that this constraint holds to avoid unexpected surprises.
13580
13581 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13582 enforce these sanity checks. */
13583int
13584ix86_secondary_memory_needed (class1, class2, mode, strict)
13585 enum reg_class class1, class2;
13586 enum machine_mode mode;
13587 int strict;
13588{
13589 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13590 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13591 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13592 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13593 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13594 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13595 {
13596 if (strict)
13597 abort ();
13598 else
13599 return 1;
13600 }
13601 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13602 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13603 && (mode) != SImode)
13604 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13605 && (mode) != SImode));
13606}
13607/* Return the cost of moving data from a register in class CLASS1 to
a4f31c00 13608 one in class CLASS2.
f84aa48a
JH
13609
13610 It is not required that the cost always equal 2 when FROM is the same as TO;
13611 on some machines it is expensive to move between registers if they are not
13612 general registers. */
13613int
13614ix86_register_move_cost (mode, class1, class2)
13615 enum machine_mode mode;
13616 enum reg_class class1, class2;
13617{
13618 /* In case we require secondary memory, compute cost of the store followed
d631b80a
RH
13619 by load. In order to avoid bad register allocation choices, we need
13620 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
13621
f84aa48a
JH
13622 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13623 {
d631b80a
RH
13624 int cost = 1;
13625
13626 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
13627 MEMORY_MOVE_COST (mode, class1, 1));
13628 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
13629 MEMORY_MOVE_COST (mode, class2, 1));
13630
13631 /* In case of copying from general_purpose_register we may emit multiple
13632 stores followed by single load causing memory size mismatch stall.
13633 Count this as arbitarily high cost of 20. */
62415523 13634 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
d631b80a
RH
13635 cost += 20;
13636
13637 /* In the case of FP/MMX moves, the registers actually overlap, and we
13638 have to switch modes in order to treat them differently. */
13639 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
13640 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
13641 cost += 20;
13642
13643 return cost;
f84aa48a 13644 }
d631b80a 13645
92d0fb09 13646 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
13647 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13648 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
13649 return ix86_cost->mmxsse_to_integer;
13650 if (MAYBE_FLOAT_CLASS_P (class1))
13651 return ix86_cost->fp_move;
13652 if (MAYBE_SSE_CLASS_P (class1))
13653 return ix86_cost->sse_move;
13654 if (MAYBE_MMX_CLASS_P (class1))
13655 return ix86_cost->mmx_move;
f84aa48a
JH
13656 return 2;
13657}
13658
a946dd00
JH
13659/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13660int
13661ix86_hard_regno_mode_ok (regno, mode)
13662 int regno;
13663 enum machine_mode mode;
13664{
13665 /* Flags and only flags can only hold CCmode values. */
13666 if (CC_REGNO_P (regno))
13667 return GET_MODE_CLASS (mode) == MODE_CC;
13668 if (GET_MODE_CLASS (mode) == MODE_CC
13669 || GET_MODE_CLASS (mode) == MODE_RANDOM
13670 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13671 return 0;
13672 if (FP_REGNO_P (regno))
13673 return VALID_FP_MODE_P (mode);
13674 if (SSE_REGNO_P (regno))
13675 return VALID_SSE_REG_MODE (mode);
13676 if (MMX_REGNO_P (regno))
47f339cf 13677 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
a946dd00
JH
13678 /* We handle both integer and floats in the general purpose registers.
13679 In future we should be able to handle vector modes as well. */
13680 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13681 return 0;
13682 /* Take care for QImode values - they can be in non-QI regs, but then
13683 they do cause partial register stalls. */
d2836273 13684 if (regno < 4 || mode != QImode || TARGET_64BIT)
a946dd00
JH
13685 return 1;
13686 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13687}
fa79946e
JH
13688
13689/* Return the cost of moving data of mode M between a
13690 register and memory. A value of 2 is the default; this cost is
13691 relative to those in `REGISTER_MOVE_COST'.
13692
13693 If moving between registers and memory is more expensive than
13694 between two registers, you should define this macro to express the
a4f31c00
AJ
13695 relative cost.
13696
fa79946e
JH
13697 Model also increased moving costs of QImode registers in non
13698 Q_REGS classes.
13699 */
13700int
13701ix86_memory_move_cost (mode, class, in)
13702 enum machine_mode mode;
13703 enum reg_class class;
13704 int in;
13705{
13706 if (FLOAT_CLASS_P (class))
13707 {
13708 int index;
13709 switch (mode)
13710 {
13711 case SFmode:
13712 index = 0;
13713 break;
13714 case DFmode:
13715 index = 1;
13716 break;
13717 case XFmode:
13718 case TFmode:
13719 index = 2;
13720 break;
13721 default:
13722 return 100;
13723 }
13724 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13725 }
13726 if (SSE_CLASS_P (class))
13727 {
13728 int index;
13729 switch (GET_MODE_SIZE (mode))
13730 {
13731 case 4:
13732 index = 0;
13733 break;
13734 case 8:
13735 index = 1;
13736 break;
13737 case 16:
13738 index = 2;
13739 break;
13740 default:
13741 return 100;
13742 }
13743 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
13744 }
13745 if (MMX_CLASS_P (class))
13746 {
13747 int index;
13748 switch (GET_MODE_SIZE (mode))
13749 {
13750 case 4:
13751 index = 0;
13752 break;
13753 case 8:
13754 index = 1;
13755 break;
13756 default:
13757 return 100;
13758 }
13759 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13760 }
13761 switch (GET_MODE_SIZE (mode))
13762 {
13763 case 1:
13764 if (in)
13765 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13766 : ix86_cost->movzbl_load);
13767 else
13768 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13769 : ix86_cost->int_store[0] + 4);
13770 break;
13771 case 2:
13772 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13773 default:
13774 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13775 if (mode == TFmode)
13776 mode = XFmode;
3bb7e126 13777 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
d09e61b9
JH
13778 * ((int) GET_MODE_SIZE (mode)
13779 + UNITS_PER_WORD -1 ) / UNITS_PER_WORD);
fa79946e
JH
13780 }
13781}
0ecf09f9 13782
21c318ba 13783#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
2cc07db4
RH
13784static void
13785ix86_svr3_asm_out_constructor (symbol, priority)
13786 rtx symbol;
13787 int priority ATTRIBUTE_UNUSED;
13788{
13789 init_section ();
13790 fputs ("\tpushl $", asm_out_file);
13791 assemble_name (asm_out_file, XSTR (symbol, 0));
13792 fputc ('\n', asm_out_file);
13793}
13794#endif
162f023b 13795
b069de3b
SS
13796#if TARGET_MACHO
13797
13798static int current_machopic_label_num;
13799
13800/* Given a symbol name and its associated stub, write out the
13801 definition of the stub. */
13802
13803void
13804machopic_output_stub (file, symb, stub)
13805 FILE *file;
13806 const char *symb, *stub;
13807{
13808 unsigned int length;
13809 char *binder_name, *symbol_name, lazy_ptr_name[32];
13810 int label = ++current_machopic_label_num;
13811
13812 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
13813 symb = (*targetm.strip_name_encoding) (symb);
13814
13815 length = strlen (stub);
13816 binder_name = alloca (length + 32);
13817 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
13818
13819 length = strlen (symb);
13820 symbol_name = alloca (length + 32);
13821 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
13822
13823 sprintf (lazy_ptr_name, "L%d$lz", label);
13824
13825 if (MACHOPIC_PURE)
13826 machopic_picsymbol_stub_section ();
13827 else
13828 machopic_symbol_stub_section ();
13829
13830 fprintf (file, "%s:\n", stub);
13831 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13832
13833 if (MACHOPIC_PURE)
13834 {
13835 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
13836 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
13837 fprintf (file, "\tjmp %%edx\n");
13838 }
13839 else
13840 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
13841
13842 fprintf (file, "%s:\n", binder_name);
13843
13844 if (MACHOPIC_PURE)
13845 {
13846 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
13847 fprintf (file, "\tpushl %%eax\n");
13848 }
13849 else
13850 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
13851
13852 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
13853
13854 machopic_lazy_symbol_ptr_section ();
13855 fprintf (file, "%s:\n", lazy_ptr_name);
13856 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13857 fprintf (file, "\t.long %s\n", binder_name);
13858}
13859#endif /* TARGET_MACHO */
13860
162f023b
JH
13861/* Order the registers for register allocator. */
13862
13863void
13864x86_order_regs_for_local_alloc ()
13865{
13866 int pos = 0;
13867 int i;
13868
13869 /* First allocate the local general purpose registers. */
13870 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13871 if (GENERAL_REGNO_P (i) && call_used_regs[i])
13872 reg_alloc_order [pos++] = i;
13873
13874 /* Global general purpose registers. */
13875 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13876 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
13877 reg_alloc_order [pos++] = i;
13878
13879 /* x87 registers come first in case we are doing FP math
13880 using them. */
13881 if (!TARGET_SSE_MATH)
13882 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13883 reg_alloc_order [pos++] = i;
fce5a9f2 13884
162f023b
JH
13885 /* SSE registers. */
13886 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
13887 reg_alloc_order [pos++] = i;
13888 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
13889 reg_alloc_order [pos++] = i;
13890
13891 /* x87 registerts. */
13892 if (TARGET_SSE_MATH)
13893 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13894 reg_alloc_order [pos++] = i;
13895
13896 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
13897 reg_alloc_order [pos++] = i;
13898
13899 /* Initialize the rest of array as we do not allocate some registers
13900 at all. */
13901 while (pos < FIRST_PSEUDO_REGISTER)
13902 reg_alloc_order [pos++] = 0;
13903}
194734e9 13904
483ab821
MM
13905/* Returns an expression indicating where the this parameter is
13906 located on entry to the FUNCTION. */
13907
13908static rtx
13909ia32_this_parameter (function)
13910 tree function;
13911{
13912 tree type = TREE_TYPE (function);
13913
13914 if (ix86_fntype_regparm (type) > 0)
13915 {
13916 tree parm;
13917
13918 parm = TYPE_ARG_TYPES (type);
13919 /* Figure out whether or not the function has a variable number of
13920 arguments. */
13921 for (; parm; parm = TREE_CHAIN (parm))\
13922 if (TREE_VALUE (parm) == void_type_node)
13923 break;
13924 /* If not, the this parameter is in %eax. */
13925 if (parm)
13926 return gen_rtx_REG (SImode, 0);
13927 }
13928
13929 if (aggregate_value_p (TREE_TYPE (type)))
13930 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
13931 else
13932 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
13933}
13934
13935
194734e9 13936void
483ab821 13937x86_output_mi_vcall_thunk (file, thunk, delta, vcall_index, function)
194734e9 13938 FILE *file;
483ab821 13939 tree thunk ATTRIBUTE_UNUSED;
eb0424da
MM
13940 HOST_WIDE_INT delta;
13941 HOST_WIDE_INT vcall_index;
194734e9
JH
13942 tree function;
13943{
194734e9
JH
13944 rtx xops[3];
13945
194734e9
JH
13946 if (TARGET_64BIT)
13947 {
13948 int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
483ab821 13949 xops[0] = GEN_INT (delta);
194734e9
JH
13950 xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
13951 output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
13952 if (flag_pic)
13953 {
13954 fprintf (file, "\tjmp *");
13955 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13956 fprintf (file, "@GOTPCREL(%%rip)\n");
13957 }
13958 else
13959 {
13960 fprintf (file, "\tjmp ");
13961 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13962 fprintf (file, "\n");
13963 }
13964 }
13965 else
13966 {
483ab821
MM
13967 /* Adjust the this parameter by a fixed constant. */
13968 if (delta)
13969 {
13970 xops[0] = GEN_INT (delta);
13971 xops[1] = ia32_this_parameter (function);
13972 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
13973 }
13974
13975 /* Adjust the this parameter by a value stored in the vtable. */
13976 if (vcall_index)
13977 {
13978 rtx this_parm;
13979
13980 /* Put the this parameter into %eax. */
13981 this_parm = ia32_this_parameter (function);
13982 if (!REG_P (this_parm))
13983 {
13984 xops[0] = this_parm;
13985 xops[1] = gen_rtx_REG (Pmode, 0);
13986 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
13987 }
13988 /* Load the virtual table pointer into %edx. */
13989 if (ix86_fntype_regparm (TREE_TYPE (function)) > 2)
13990 error ("virtual function `%D' cannot have more than two register parameters",
13991 function);
13992 xops[0] = gen_rtx_MEM (Pmode,
13993 gen_rtx_REG (Pmode, 0));
13994 xops[1] = gen_rtx_REG (Pmode, 1);
13995 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
13996 /* Adjust the this parameter. */
13997 xops[0] = gen_rtx_MEM (SImode,
13998 plus_constant (gen_rtx_REG (Pmode, 1),
13999 vcall_index));
14000 xops[1] = gen_rtx_REG (Pmode, 0);
14001 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14002 /* Put the this parameter back where it came from. */
14003 if (!REG_P (this_parm))
14004 {
14005 xops[0] = gen_rtx_REG (Pmode, 0);
14006 xops[1] = ia32_this_parameter (function);
14007 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14008 }
14009 }
194734e9
JH
14010
14011 if (flag_pic)
14012 {
14013 xops[0] = pic_offset_table_rtx;
14014 xops[1] = gen_label_rtx ();
5fc0e5df 14015 xops[2] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
194734e9
JH
14016
14017 if (ix86_regparm > 2)
14018 abort ();
14019 output_asm_insn ("push{l}\t%0", xops);
14020 output_asm_insn ("call\t%P1", xops);
14021 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
14022 output_asm_insn ("pop{l}\t%0", xops);
14023 output_asm_insn
14024 ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
14025 xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
14026 output_asm_insn
14027 ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
14028 asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
14029 asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
14030 }
14031 else
14032 {
483ab821 14033 fprintf (file, "\tjmp\t");
194734e9
JH
14034 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
14035 fprintf (file, "\n");
14036 }
14037 }
14038}
e2500fed 14039
483ab821
MM
14040void
14041x86_output_mi_thunk (file, thunk, delta, function)
14042 FILE *file;
14043 tree thunk;
eb0424da 14044 HOST_WIDE_INT delta;
483ab821
MM
14045 tree function;
14046{
14047 x86_output_mi_vcall_thunk (file, thunk, delta, /*vcall_index=*/0,
14048 function);
14049}
14050
e932b21b
JH
14051int
14052x86_field_alignment (field, computed)
14053 tree field;
14054 int computed;
14055{
14056 enum machine_mode mode;
ad9335eb
JJ
14057 tree type = TREE_TYPE (field);
14058
14059 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
e932b21b 14060 return computed;
ad9335eb
JJ
14061 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
14062 ? get_inner_array_type (type) : type);
39e3a681
JJ
14063 if (mode == DFmode || mode == DCmode
14064 || GET_MODE_CLASS (mode) == MODE_INT
14065 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
e932b21b
JH
14066 return MIN (32, computed);
14067 return computed;
14068}
14069
2a500b9e
JH
14070/* Implement machine specific optimizations.
14071 At the moment we implement single transformation: AMD Athlon works faster
14072 when RET is not destination of conditional jump or directly preceeded
14073 by other jump instruction. We avoid the penalty by inserting NOP just
14074 before the RET instructions in such cases. */
14075void
14076x86_machine_dependent_reorg (first)
14077 rtx first ATTRIBUTE_UNUSED;
14078{
14079 edge e;
14080
14081 if (!TARGET_ATHLON || !optimize || optimize_size)
14082 return;
14083 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
14084 {
14085 basic_block bb = e->src;
14086 rtx ret = bb->end;
14087 rtx prev;
14088 bool insert = false;
14089
14090 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
14091 continue;
14092 prev = prev_nonnote_insn (ret);
14093 if (prev && GET_CODE (prev) == CODE_LABEL)
14094 {
14095 edge e;
14096 for (e = bb->pred; e; e = e->pred_next)
14097 if (EDGE_FREQUENCY (e) && e->src->index > 0
14098 && !(e->flags & EDGE_FALLTHRU))
14099 insert = 1;
14100 }
14101 if (!insert)
14102 {
14103 prev = prev_real_insn (ret);
14104 if (prev && GET_CODE (prev) == JUMP_INSN
14105 && any_condjump_p (prev))
14106 insert = 1;
14107 }
14108 if (insert)
14109 emit_insn_before (gen_nop (), ret);
14110 }
14111}
14112
e2500fed 14113#include "gt-i386.h"
This page took 3.514262 seconds and 5 git commands to generate.