]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
pt.c (convert_template_argument): Revert this change: 2002-10-16 Mark Mitchell <mark...
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
07933f72
GS
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
2a2ab3f9
JVA
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
bb5177ac 23#include "system.h"
2a2ab3f9 24#include "rtl.h"
6baf1cc8
BS
25#include "tree.h"
26#include "tm_p.h"
2a2ab3f9
JVA
27#include "regs.h"
28#include "hard-reg-set.h"
29#include "real.h"
30#include "insn-config.h"
31#include "conditions.h"
2a2ab3f9
JVA
32#include "output.h"
33#include "insn-attr.h"
2a2ab3f9 34#include "flags.h"
a8ffcc81 35#include "except.h"
ecbc4695 36#include "function.h"
00c79232 37#include "recog.h"
ced8dd8c 38#include "expr.h"
e78d8e51 39#include "optabs.h"
f103890b 40#include "toplev.h"
e075ae69 41#include "basic-block.h"
1526a060 42#include "ggc.h"
672a6f42
NB
43#include "target.h"
44#include "target-def.h"
f1e639b1 45#include "langhooks.h"
2a2ab3f9 46
8dfe5673 47#ifndef CHECK_STACK_LIMIT
07933f72 48#define CHECK_STACK_LIMIT (-1)
8dfe5673
RK
49#endif
50
2ab0437e 51/* Processor costs (relative to an add) */
fce5a9f2 52static const
2ab0437e
JH
53struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
44cf5b6a
JH
61 3, /* cost of movsx */
62 3, /* cost of movzx */
2ab0437e
JH
63 0, /* "large" insn */
64 2, /* MOVE_RATIO */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
f4365627
JH
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
229b303a
RS
87 2, /* cost of FADD and FSUB insns. */
88 2, /* cost of FMUL instruction. */
89 2, /* cost of FDIV instruction. */
90 2, /* cost of FABS instruction. */
91 2, /* cost of FCHS instruction. */
92 2, /* cost of FSQRT instruction. */
2ab0437e 93};
229b303a 94
32b5b1aa 95/* Processor costs (relative to an add) */
fce5a9f2 96static const
32b5b1aa 97struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 98 1, /* cost of an add instruction */
32b5b1aa
SC
99 1, /* cost of a lea instruction */
100 3, /* variable shift costs */
101 2, /* constant shift costs */
102 6, /* cost of starting a multiply */
103 1, /* cost of multiply per each bit set */
e075ae69 104 23, /* cost of a divide/mod */
44cf5b6a
JH
105 3, /* cost of movsx */
106 2, /* cost of movzx */
96e7ae40 107 15, /* "large" insn */
e2e52e1b 108 3, /* MOVE_RATIO */
7c6b971d 109 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
110 {2, 4, 2}, /* cost of loading integer registers
111 in QImode, HImode and SImode.
0f290768 112 Relative to reg-reg move (2). */
96e7ae40
JH
113 {2, 4, 2}, /* cost of storing integer registers */
114 2, /* cost of reg,reg fld/fst */
115 {8, 8, 8}, /* cost of loading fp registers
116 in SFmode, DFmode and XFmode */
fa79946e
JH
117 {8, 8, 8}, /* cost of loading integer registers */
118 2, /* cost of moving MMX register */
119 {4, 8}, /* cost of loading MMX registers
120 in SImode and DImode */
121 {4, 8}, /* cost of storing MMX registers
122 in SImode and DImode */
123 2, /* cost of moving SSE register */
124 {4, 8, 16}, /* cost of loading SSE registers
125 in SImode, DImode and TImode */
126 {4, 8, 16}, /* cost of storing SSE registers
127 in SImode, DImode and TImode */
128 3, /* MMX or SSE register to integer */
f4365627
JH
129 0, /* size of prefetch block */
130 0, /* number of parallel prefetches */
229b303a
RS
131 23, /* cost of FADD and FSUB insns. */
132 27, /* cost of FMUL instruction. */
133 88, /* cost of FDIV instruction. */
134 22, /* cost of FABS instruction. */
135 24, /* cost of FCHS instruction. */
136 122, /* cost of FSQRT instruction. */
32b5b1aa
SC
137};
138
fce5a9f2 139static const
32b5b1aa
SC
140struct processor_costs i486_cost = { /* 486 specific costs */
141 1, /* cost of an add instruction */
142 1, /* cost of a lea instruction */
143 3, /* variable shift costs */
144 2, /* constant shift costs */
145 12, /* cost of starting a multiply */
146 1, /* cost of multiply per each bit set */
e075ae69 147 40, /* cost of a divide/mod */
44cf5b6a
JH
148 3, /* cost of movsx */
149 2, /* cost of movzx */
96e7ae40 150 15, /* "large" insn */
e2e52e1b 151 3, /* MOVE_RATIO */
7c6b971d 152 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
0f290768 155 Relative to reg-reg move (2). */
96e7ae40
JH
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
fa79946e
JH
160 {8, 8, 8}, /* cost of loading integer registers */
161 2, /* cost of moving MMX register */
162 {4, 8}, /* cost of loading MMX registers
163 in SImode and DImode */
164 {4, 8}, /* cost of storing MMX registers
165 in SImode and DImode */
166 2, /* cost of moving SSE register */
167 {4, 8, 16}, /* cost of loading SSE registers
168 in SImode, DImode and TImode */
169 {4, 8, 16}, /* cost of storing SSE registers
170 in SImode, DImode and TImode */
f4365627
JH
171 3, /* MMX or SSE register to integer */
172 0, /* size of prefetch block */
173 0, /* number of parallel prefetches */
229b303a
RS
174 8, /* cost of FADD and FSUB insns. */
175 16, /* cost of FMUL instruction. */
176 73, /* cost of FDIV instruction. */
177 3, /* cost of FABS instruction. */
178 3, /* cost of FCHS instruction. */
179 83, /* cost of FSQRT instruction. */
32b5b1aa
SC
180};
181
fce5a9f2 182static const
e5cb57e8 183struct processor_costs pentium_cost = {
32b5b1aa
SC
184 1, /* cost of an add instruction */
185 1, /* cost of a lea instruction */
856b07a1 186 4, /* variable shift costs */
e5cb57e8 187 1, /* constant shift costs */
856b07a1
SC
188 11, /* cost of starting a multiply */
189 0, /* cost of multiply per each bit set */
e075ae69 190 25, /* cost of a divide/mod */
44cf5b6a
JH
191 3, /* cost of movsx */
192 2, /* cost of movzx */
96e7ae40 193 8, /* "large" insn */
e2e52e1b 194 6, /* MOVE_RATIO */
7c6b971d 195 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
196 {2, 4, 2}, /* cost of loading integer registers
197 in QImode, HImode and SImode.
0f290768 198 Relative to reg-reg move (2). */
96e7ae40
JH
199 {2, 4, 2}, /* cost of storing integer registers */
200 2, /* cost of reg,reg fld/fst */
201 {2, 2, 6}, /* cost of loading fp registers
202 in SFmode, DFmode and XFmode */
fa79946e
JH
203 {4, 4, 6}, /* cost of loading integer registers */
204 8, /* cost of moving MMX register */
205 {8, 8}, /* cost of loading MMX registers
206 in SImode and DImode */
207 {8, 8}, /* cost of storing MMX registers
208 in SImode and DImode */
209 2, /* cost of moving SSE register */
210 {4, 8, 16}, /* cost of loading SSE registers
211 in SImode, DImode and TImode */
212 {4, 8, 16}, /* cost of storing SSE registers
213 in SImode, DImode and TImode */
f4365627
JH
214 3, /* MMX or SSE register to integer */
215 0, /* size of prefetch block */
216 0, /* number of parallel prefetches */
229b303a
RS
217 3, /* cost of FADD and FSUB insns. */
218 3, /* cost of FMUL instruction. */
219 39, /* cost of FDIV instruction. */
220 1, /* cost of FABS instruction. */
221 1, /* cost of FCHS instruction. */
222 70, /* cost of FSQRT instruction. */
32b5b1aa
SC
223};
224
fce5a9f2 225static const
856b07a1
SC
226struct processor_costs pentiumpro_cost = {
227 1, /* cost of an add instruction */
228 1, /* cost of a lea instruction */
e075ae69 229 1, /* variable shift costs */
856b07a1 230 1, /* constant shift costs */
369e59b1 231 4, /* cost of starting a multiply */
856b07a1 232 0, /* cost of multiply per each bit set */
e075ae69 233 17, /* cost of a divide/mod */
44cf5b6a
JH
234 1, /* cost of movsx */
235 1, /* cost of movzx */
96e7ae40 236 8, /* "large" insn */
e2e52e1b 237 6, /* MOVE_RATIO */
7c6b971d 238 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
239 {4, 4, 4}, /* cost of loading integer registers
240 in QImode, HImode and SImode.
0f290768 241 Relative to reg-reg move (2). */
96e7ae40
JH
242 {2, 2, 2}, /* cost of storing integer registers */
243 2, /* cost of reg,reg fld/fst */
244 {2, 2, 6}, /* cost of loading fp registers
245 in SFmode, DFmode and XFmode */
fa79946e
JH
246 {4, 4, 6}, /* cost of loading integer registers */
247 2, /* cost of moving MMX register */
248 {2, 2}, /* cost of loading MMX registers
249 in SImode and DImode */
250 {2, 2}, /* cost of storing MMX registers
251 in SImode and DImode */
252 2, /* cost of moving SSE register */
253 {2, 2, 8}, /* cost of loading SSE registers
254 in SImode, DImode and TImode */
255 {2, 2, 8}, /* cost of storing SSE registers
256 in SImode, DImode and TImode */
f4365627
JH
257 3, /* MMX or SSE register to integer */
258 32, /* size of prefetch block */
259 6, /* number of parallel prefetches */
229b303a
RS
260 3, /* cost of FADD and FSUB insns. */
261 5, /* cost of FMUL instruction. */
262 56, /* cost of FDIV instruction. */
263 2, /* cost of FABS instruction. */
264 2, /* cost of FCHS instruction. */
265 56, /* cost of FSQRT instruction. */
856b07a1
SC
266};
267
fce5a9f2 268static const
a269a03c
JC
269struct processor_costs k6_cost = {
270 1, /* cost of an add instruction */
e075ae69 271 2, /* cost of a lea instruction */
a269a03c
JC
272 1, /* variable shift costs */
273 1, /* constant shift costs */
73fe76e4 274 3, /* cost of starting a multiply */
a269a03c 275 0, /* cost of multiply per each bit set */
e075ae69 276 18, /* cost of a divide/mod */
44cf5b6a
JH
277 2, /* cost of movsx */
278 2, /* cost of movzx */
96e7ae40 279 8, /* "large" insn */
e2e52e1b 280 4, /* MOVE_RATIO */
7c6b971d 281 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
282 {4, 5, 4}, /* cost of loading integer registers
283 in QImode, HImode and SImode.
0f290768 284 Relative to reg-reg move (2). */
96e7ae40
JH
285 {2, 3, 2}, /* cost of storing integer registers */
286 4, /* cost of reg,reg fld/fst */
287 {6, 6, 6}, /* cost of loading fp registers
288 in SFmode, DFmode and XFmode */
fa79946e
JH
289 {4, 4, 4}, /* cost of loading integer registers */
290 2, /* cost of moving MMX register */
291 {2, 2}, /* cost of loading MMX registers
292 in SImode and DImode */
293 {2, 2}, /* cost of storing MMX registers
294 in SImode and DImode */
295 2, /* cost of moving SSE register */
296 {2, 2, 8}, /* cost of loading SSE registers
297 in SImode, DImode and TImode */
298 {2, 2, 8}, /* cost of storing SSE registers
299 in SImode, DImode and TImode */
f4365627
JH
300 6, /* MMX or SSE register to integer */
301 32, /* size of prefetch block */
302 1, /* number of parallel prefetches */
229b303a
RS
303 2, /* cost of FADD and FSUB insns. */
304 2, /* cost of FMUL instruction. */
4f770e7b
RS
305 56, /* cost of FDIV instruction. */
306 2, /* cost of FABS instruction. */
229b303a
RS
307 2, /* cost of FCHS instruction. */
308 56, /* cost of FSQRT instruction. */
a269a03c
JC
309};
310
fce5a9f2 311static const
309ada50
JH
312struct processor_costs athlon_cost = {
313 1, /* cost of an add instruction */
0b5107cf 314 2, /* cost of a lea instruction */
309ada50
JH
315 1, /* variable shift costs */
316 1, /* constant shift costs */
317 5, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
0b5107cf 319 42, /* cost of a divide/mod */
44cf5b6a
JH
320 1, /* cost of movsx */
321 1, /* cost of movzx */
309ada50 322 8, /* "large" insn */
e2e52e1b 323 9, /* MOVE_RATIO */
309ada50 324 4, /* cost for loading QImode using movzbl */
b72b1c29 325 {3, 4, 3}, /* cost of loading integer registers
309ada50 326 in QImode, HImode and SImode.
0f290768 327 Relative to reg-reg move (2). */
b72b1c29 328 {3, 4, 3}, /* cost of storing integer registers */
309ada50 329 4, /* cost of reg,reg fld/fst */
b72b1c29 330 {4, 4, 12}, /* cost of loading fp registers
309ada50 331 in SFmode, DFmode and XFmode */
b72b1c29 332 {6, 6, 8}, /* cost of loading integer registers */
fa79946e 333 2, /* cost of moving MMX register */
b72b1c29 334 {4, 4}, /* cost of loading MMX registers
fa79946e 335 in SImode and DImode */
b72b1c29 336 {4, 4}, /* cost of storing MMX registers
fa79946e
JH
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
b72b1c29 339 {4, 4, 6}, /* cost of loading SSE registers
fa79946e 340 in SImode, DImode and TImode */
b72b1c29 341 {4, 4, 5}, /* cost of storing SSE registers
fa79946e 342 in SImode, DImode and TImode */
b72b1c29 343 5, /* MMX or SSE register to integer */
f4365627
JH
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
229b303a
RS
346 4, /* cost of FADD and FSUB insns. */
347 4, /* cost of FMUL instruction. */
348 24, /* cost of FDIV instruction. */
349 2, /* cost of FABS instruction. */
350 2, /* cost of FCHS instruction. */
351 35, /* cost of FSQRT instruction. */
309ada50
JH
352};
353
fce5a9f2 354static const
b4e89e2d
JH
355struct processor_costs pentium4_cost = {
356 1, /* cost of an add instruction */
357 1, /* cost of a lea instruction */
358 8, /* variable shift costs */
359 8, /* constant shift costs */
360 30, /* cost of starting a multiply */
361 0, /* cost of multiply per each bit set */
362 112, /* cost of a divide/mod */
44cf5b6a
JH
363 1, /* cost of movsx */
364 1, /* cost of movzx */
b4e89e2d
JH
365 16, /* "large" insn */
366 6, /* MOVE_RATIO */
367 2, /* cost for loading QImode using movzbl */
368 {4, 5, 4}, /* cost of loading integer registers
369 in QImode, HImode and SImode.
370 Relative to reg-reg move (2). */
371 {2, 3, 2}, /* cost of storing integer registers */
372 2, /* cost of reg,reg fld/fst */
373 {2, 2, 6}, /* cost of loading fp registers
374 in SFmode, DFmode and XFmode */
375 {4, 4, 6}, /* cost of loading integer registers */
376 2, /* cost of moving MMX register */
377 {2, 2}, /* cost of loading MMX registers
378 in SImode and DImode */
379 {2, 2}, /* cost of storing MMX registers
380 in SImode and DImode */
381 12, /* cost of moving SSE register */
382 {12, 12, 12}, /* cost of loading SSE registers
383 in SImode, DImode and TImode */
384 {2, 2, 8}, /* cost of storing SSE registers
385 in SImode, DImode and TImode */
386 10, /* MMX or SSE register to integer */
f4365627
JH
387 64, /* size of prefetch block */
388 6, /* number of parallel prefetches */
229b303a
RS
389 5, /* cost of FADD and FSUB insns. */
390 7, /* cost of FMUL instruction. */
391 43, /* cost of FDIV instruction. */
392 2, /* cost of FABS instruction. */
393 2, /* cost of FCHS instruction. */
394 43, /* cost of FSQRT instruction. */
b4e89e2d
JH
395};
396
8b60264b 397const struct processor_costs *ix86_cost = &pentium_cost;
32b5b1aa 398
a269a03c
JC
399/* Processor feature/optimization bitmasks. */
400#define m_386 (1<<PROCESSOR_I386)
401#define m_486 (1<<PROCESSOR_I486)
402#define m_PENT (1<<PROCESSOR_PENTIUM)
403#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
404#define m_K6 (1<<PROCESSOR_K6)
309ada50 405#define m_ATHLON (1<<PROCESSOR_ATHLON)
b4e89e2d 406#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
a269a03c 407
309ada50 408const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
b4e89e2d 409const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
a269a03c 410const int x86_zero_extend_with_and = m_486 | m_PENT;
b4e89e2d 411const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
e075ae69 412const int x86_double_with_add = ~m_386;
a269a03c 413const int x86_use_bit_test = m_386;
e2e52e1b 414const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
b4e89e2d 415const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
47f339cf 416const int x86_3dnow_a = m_ATHLON;
b4e89e2d 417const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
ef6257cd 418const int x86_branch_hints = m_PENT4;
b4e89e2d 419const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
e075ae69
RH
420const int x86_partial_reg_stall = m_PPRO;
421const int x86_use_loop = m_K6;
309ada50 422const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
e075ae69
RH
423const int x86_use_mov0 = m_K6;
424const int x86_use_cltd = ~(m_PENT | m_K6);
425const int x86_read_modify_write = ~m_PENT;
426const int x86_read_modify = ~(m_PENT | m_PPRO);
427const int x86_split_long_moves = m_PPRO;
285464d0
JH
428const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
429const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
b4e89e2d 430const int x86_single_stringop = m_386 | m_PENT4;
d9f32422
JH
431const int x86_qimode_math = ~(0);
432const int x86_promote_qi_regs = 0;
433const int x86_himode_math = ~(m_PPRO);
434const int x86_promote_hi_regs = m_PPRO;
b4e89e2d
JH
435const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
436const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
437const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
438const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
77966be3 439const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO);
b4e89e2d
JH
440const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
441const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
c6036a37
JH
442const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
443const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
444const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
b972dd02 445const int x86_decompose_lea = m_PENT4;
495333a6 446const int x86_shift1 = ~m_486;
285464d0 447const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
a269a03c 448
6ab16dd9
JH
449/* In case the avreage insn count for single function invocation is
450 lower than this constant, emit fast (but longer) prologue and
451 epilogue code. */
452#define FAST_PROLOGUE_INSN_COUNT 30
5bf0ebab 453
6ab16dd9
JH
454/* Set by prologue expander and used by epilogue expander to determine
455 the style used. */
456static int use_fast_prologue_epilogue;
457
5bf0ebab
RH
458/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
459static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
460static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
461static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
4c0d89b5
RS
462
463/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 464 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 465
e075ae69 466enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
467{
468 /* ax, dx, cx, bx */
ab408a86 469 AREG, DREG, CREG, BREG,
4c0d89b5 470 /* si, di, bp, sp */
e075ae69 471 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
472 /* FP registers */
473 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 474 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 475 /* arg pointer */
83774849 476 NON_Q_REGS,
564d80f4 477 /* flags, fpsr, dirflag, frame */
a7180f70
BS
478 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
479 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
480 SSE_REGS, SSE_REGS,
481 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30
JH
482 MMX_REGS, MMX_REGS,
483 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
484 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
485 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
486 SSE_REGS, SSE_REGS,
4c0d89b5 487};
c572e5ba 488
3d117b30 489/* The "default" register map used in 32bit mode. */
83774849 490
0f290768 491int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
492{
493 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
494 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
3d117b30 495 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
496 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
497 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
498 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
499 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
500};
501
5bf0ebab
RH
502static int const x86_64_int_parameter_registers[6] =
503{
504 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
505 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
506};
507
508static int const x86_64_int_return_registers[4] =
509{
510 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
511};
53c17031 512
0f7fa3d0
JH
513/* The "default" register map used in 64bit mode. */
514int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
515{
516 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
3de72741 517 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
0f7fa3d0
JH
518 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
519 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
520 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
521 8,9,10,11,12,13,14,15, /* extended integer registers */
522 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
523};
524
83774849
RH
525/* Define the register numbers to be used in Dwarf debugging information.
526 The SVR4 reference port C compiler uses the following register numbers
527 in its Dwarf output code:
528 0 for %eax (gcc regno = 0)
529 1 for %ecx (gcc regno = 2)
530 2 for %edx (gcc regno = 1)
531 3 for %ebx (gcc regno = 3)
532 4 for %esp (gcc regno = 7)
533 5 for %ebp (gcc regno = 6)
534 6 for %esi (gcc regno = 4)
535 7 for %edi (gcc regno = 5)
536 The following three DWARF register numbers are never generated by
537 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
538 believes these numbers have these meanings.
539 8 for %eip (no gcc equivalent)
540 9 for %eflags (gcc regno = 17)
541 10 for %trapno (no gcc equivalent)
542 It is not at all clear how we should number the FP stack registers
543 for the x86 architecture. If the version of SDB on x86/svr4 were
544 a bit less brain dead with respect to floating-point then we would
545 have a precedent to follow with respect to DWARF register numbers
546 for x86 FP registers, but the SDB on x86/svr4 is so completely
547 broken with respect to FP registers that it is hardly worth thinking
548 of it as something to strive for compatibility with.
549 The version of x86/svr4 SDB I have at the moment does (partially)
550 seem to believe that DWARF register number 11 is associated with
551 the x86 register %st(0), but that's about all. Higher DWARF
552 register numbers don't seem to be associated with anything in
553 particular, and even for DWARF regno 11, SDB only seems to under-
554 stand that it should say that a variable lives in %st(0) (when
555 asked via an `=' command) if we said it was in DWARF regno 11,
556 but SDB still prints garbage when asked for the value of the
557 variable in question (via a `/' command).
558 (Also note that the labels SDB prints for various FP stack regs
559 when doing an `x' command are all wrong.)
560 Note that these problems generally don't affect the native SVR4
561 C compiler because it doesn't allow the use of -O with -g and
562 because when it is *not* optimizing, it allocates a memory
563 location for each floating-point variable, and the memory
564 location is what gets described in the DWARF AT_location
565 attribute for the variable in question.
566 Regardless of the severe mental illness of the x86/svr4 SDB, we
567 do something sensible here and we use the following DWARF
568 register numbers. Note that these are all stack-top-relative
569 numbers.
570 11 for %st(0) (gcc regno = 8)
571 12 for %st(1) (gcc regno = 9)
572 13 for %st(2) (gcc regno = 10)
573 14 for %st(3) (gcc regno = 11)
574 15 for %st(4) (gcc regno = 12)
575 16 for %st(5) (gcc regno = 13)
576 17 for %st(6) (gcc regno = 14)
577 18 for %st(7) (gcc regno = 15)
578*/
0f290768 579int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
580{
581 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
582 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
3f3f2124 583 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
584 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
585 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
3f3f2124
JH
586 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
587 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
83774849
RH
588};
589
c572e5ba
JVA
590/* Test and compare insns in i386.md store the information needed to
591 generate branch and scc insns here. */
592
07933f72
GS
593rtx ix86_compare_op0 = NULL_RTX;
594rtx ix86_compare_op1 = NULL_RTX;
f5316dfe 595
f996902d
RH
596/* The encoding characters for the four TLS models present in ELF. */
597
755ac5d4 598static char const tls_model_chars[] = " GLil";
f996902d 599
7a2e09f4 600#define MAX_386_STACK_LOCALS 3
8362f420
JH
601/* Size of the register save area. */
602#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
36edd3cc
BS
603
604/* Define the structure for the machine field in struct function. */
e2500fed 605struct machine_function GTY(())
36edd3cc
BS
606{
607 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
f996902d 608 const char *some_ld_name;
8362f420 609 int save_varrargs_registers;
6fca22eb 610 int accesses_prev_frame;
36edd3cc
BS
611};
612
01d939e8 613#define ix86_stack_locals (cfun->machine->stack_locals)
8362f420 614#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
36edd3cc 615
4dd2ac2c
JH
616/* Structure describing stack frame layout.
617 Stack grows downward:
618
619 [arguments]
620 <- ARG_POINTER
621 saved pc
622
623 saved frame pointer if frame_pointer_needed
624 <- HARD_FRAME_POINTER
625 [saved regs]
626
627 [padding1] \
628 )
629 [va_arg registers] (
630 > to_allocate <- FRAME_POINTER
631 [frame] (
632 )
633 [padding2] /
634 */
635struct ix86_frame
636{
637 int nregs;
638 int padding1;
8362f420 639 int va_arg_size;
4dd2ac2c
JH
640 HOST_WIDE_INT frame;
641 int padding2;
642 int outgoing_arguments_size;
8362f420 643 int red_zone_size;
4dd2ac2c
JH
644
645 HOST_WIDE_INT to_allocate;
646 /* The offsets relative to ARG_POINTER. */
647 HOST_WIDE_INT frame_pointer_offset;
648 HOST_WIDE_INT hard_frame_pointer_offset;
649 HOST_WIDE_INT stack_pointer_offset;
650};
651
c93e80a5
JH
652/* Used to enable/disable debugging features. */
653const char *ix86_debug_arg_string, *ix86_debug_addr_string;
6189a572
JH
654/* Code model option as passed by user. */
655const char *ix86_cmodel_string;
656/* Parsed value. */
657enum cmodel ix86_cmodel;
80f33d06
GS
658/* Asm dialect. */
659const char *ix86_asm_string;
660enum asm_dialect ix86_asm_dialect = ASM_ATT;
f996902d
RH
661/* TLS dialext. */
662const char *ix86_tls_dialect_string;
663enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
6189a572 664
5bf0ebab 665/* Which unit we are generating floating point math for. */
965f5423
JH
666enum fpmath_unit ix86_fpmath;
667
5bf0ebab
RH
668/* Which cpu are we scheduling for. */
669enum processor_type ix86_cpu;
670/* Which instruction set architecture to use. */
671enum processor_type ix86_arch;
c8c5cb99
SC
672
673/* Strings to hold which cpu and instruction set architecture to use. */
9c23aa47
ZW
674const char *ix86_cpu_string; /* for -mcpu=<xxx> */
675const char *ix86_arch_string; /* for -march=<xxx> */
965f5423 676const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
c8c5cb99 677
0f290768 678/* # of registers to use to pass arguments. */
e075ae69 679const char *ix86_regparm_string;
e9a25f70 680
f4365627
JH
681/* true if sse prefetch instruction is not NOOP. */
682int x86_prefetch_sse;
683
e075ae69
RH
684/* ix86_regparm_string as a number */
685int ix86_regparm;
e9a25f70
JL
686
687/* Alignment to use for loops and jumps: */
688
0f290768 689/* Power of two alignment for loops. */
e075ae69 690const char *ix86_align_loops_string;
e9a25f70 691
0f290768 692/* Power of two alignment for non-loop jumps. */
e075ae69 693const char *ix86_align_jumps_string;
e9a25f70 694
3af4bd89 695/* Power of two alignment for stack boundary in bytes. */
e075ae69 696const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
697
698/* Preferred alignment for stack boundary in bits. */
e075ae69 699int ix86_preferred_stack_boundary;
3af4bd89 700
e9a25f70 701/* Values 1-5: see jump.c */
e075ae69
RH
702int ix86_branch_cost;
703const char *ix86_branch_cost_string;
e9a25f70 704
0f290768 705/* Power of two alignment for functions. */
e075ae69 706const char *ix86_align_funcs_string;
623fe810
RH
707
708/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
709static char internal_label_prefix[16];
710static int internal_label_prefix_len;
e075ae69 711\f
623fe810 712static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
f996902d 713static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
f6da8bc3
KG
714static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
715static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
e075ae69 716 int, int, FILE *));
f996902d
RH
717static const char *get_some_local_dynamic_name PARAMS ((void));
718static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
719static rtx maybe_get_pool_constant PARAMS ((rtx));
f6da8bc3 720static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
3a3677ff
RH
721static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
722 rtx *, rtx *));
f996902d 723static rtx get_thread_pointer PARAMS ((void));
145aacc2 724static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
f6da8bc3
KG
725static rtx gen_push PARAMS ((rtx));
726static int memory_address_length PARAMS ((rtx addr));
727static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
728static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
f6da8bc3
KG
729static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
730static void ix86_dump_ppro_packet PARAMS ((FILE *));
731static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
e2500fed 732static struct machine_function * ix86_init_machine_status PARAMS ((void));
2b589241 733static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
b531087a
KH
734static int ix86_nsaved_regs PARAMS ((void));
735static void ix86_emit_save_regs PARAMS ((void));
c6036a37 736static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
37a58036 737static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
bd09bdeb 738static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
0e4970d7 739static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
b531087a 740static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
55efb413 741static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
0945b39d 742static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
0945b39d
JH
743static rtx ix86_expand_aligntest PARAMS ((rtx, int));
744static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
c237e94a
ZW
745static int ix86_issue_rate PARAMS ((void));
746static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
747static void ix86_sched_init PARAMS ((FILE *, int, int));
748static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
749static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
9b690711
RH
750static int ia32_use_dfa_pipeline_interface PARAMS ((void));
751static int ia32_multipass_dfa_lookahead PARAMS ((void));
e37af218 752static void ix86_init_mmx_sse_builtins PARAMS ((void));
e075ae69
RH
753
754struct ix86_address
755{
756 rtx base, index, disp;
757 HOST_WIDE_INT scale;
758};
b08de47e 759
e075ae69 760static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
bd793c65 761
f996902d
RH
762static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
763static const char *ix86_strip_name_encoding PARAMS ((const char *))
764 ATTRIBUTE_UNUSED;
fb49053f 765
bd793c65 766struct builtin_description;
8b60264b
KG
767static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
768 tree, rtx));
769static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
770 tree, rtx));
bd793c65
BS
771static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
772static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
773static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
e37af218 774static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
bd793c65 775static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
c0c102a9
JH
776static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
777static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
778 enum rtx_code *,
779 enum rtx_code *,
780 enum rtx_code *));
9e7adcb3
JH
781static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
782 rtx *, rtx *));
783static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
784static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
785static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
786static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
bd09bdeb 787static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
9b690711 788static int ix86_save_reg PARAMS ((unsigned int, int));
4dd2ac2c 789static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
8d8e52be 790static int ix86_comp_type_attributes PARAMS ((tree, tree));
91d231cb
JM
791const struct attribute_spec ix86_attribute_table[];
792static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
793static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
b069de3b 794static int ix86_value_regno PARAMS ((enum machine_mode));
7c262518 795
21c318ba 796#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
2cc07db4
RH
797static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
798#endif
e56feed6 799
53c17031
JH
800/* Register class used for passing given 64bit part of the argument.
801 These represent classes as documented by the PS ABI, with the exception
802 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
803 use SF or DFmode move instead of DImode to avoid reformating penalties.
804
805 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
806 whenever possible (upper half does contain padding).
807 */
808enum x86_64_reg_class
809 {
810 X86_64_NO_CLASS,
811 X86_64_INTEGER_CLASS,
812 X86_64_INTEGERSI_CLASS,
813 X86_64_SSE_CLASS,
814 X86_64_SSESF_CLASS,
815 X86_64_SSEDF_CLASS,
816 X86_64_SSEUP_CLASS,
817 X86_64_X87_CLASS,
818 X86_64_X87UP_CLASS,
819 X86_64_MEMORY_CLASS
820 };
0b5826ac 821static const char * const x86_64_reg_class_name[] =
53c17031
JH
822 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
823
824#define MAX_CLASSES 4
825static int classify_argument PARAMS ((enum machine_mode, tree,
826 enum x86_64_reg_class [MAX_CLASSES],
827 int));
828static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
829 int *));
830static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
07933f72 831 const int *, int));
53c17031
JH
832static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
833 enum x86_64_reg_class));
672a6f42
NB
834\f
835/* Initialize the GCC target structure. */
91d231cb
JM
836#undef TARGET_ATTRIBUTE_TABLE
837#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
672a6f42 838#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
f5f4be42
NB
839# undef TARGET_MERGE_DECL_ATTRIBUTES
840# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
672a6f42
NB
841#endif
842
8d8e52be
JM
843#undef TARGET_COMP_TYPE_ATTRIBUTES
844#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
845
f6155fda
SS
846#undef TARGET_INIT_BUILTINS
847#define TARGET_INIT_BUILTINS ix86_init_builtins
848
849#undef TARGET_EXPAND_BUILTIN
850#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
851
bd09bdeb
RH
852#undef TARGET_ASM_FUNCTION_EPILOGUE
853#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
08c148a8 854
17b53c33
NB
855#undef TARGET_ASM_OPEN_PAREN
856#define TARGET_ASM_OPEN_PAREN ""
857#undef TARGET_ASM_CLOSE_PAREN
858#define TARGET_ASM_CLOSE_PAREN ""
859
301d03af
RS
860#undef TARGET_ASM_ALIGNED_HI_OP
861#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
862#undef TARGET_ASM_ALIGNED_SI_OP
863#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
864#ifdef ASM_QUAD
865#undef TARGET_ASM_ALIGNED_DI_OP
866#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
867#endif
868
869#undef TARGET_ASM_UNALIGNED_HI_OP
870#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
871#undef TARGET_ASM_UNALIGNED_SI_OP
872#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
873#undef TARGET_ASM_UNALIGNED_DI_OP
874#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
875
c237e94a
ZW
876#undef TARGET_SCHED_ADJUST_COST
877#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
878#undef TARGET_SCHED_ISSUE_RATE
879#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
880#undef TARGET_SCHED_VARIABLE_ISSUE
881#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
882#undef TARGET_SCHED_INIT
883#define TARGET_SCHED_INIT ix86_sched_init
884#undef TARGET_SCHED_REORDER
885#define TARGET_SCHED_REORDER ix86_sched_reorder
fce5a9f2 886#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
9b690711
RH
887#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
888 ia32_use_dfa_pipeline_interface
889#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
890#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
891 ia32_multipass_dfa_lookahead
c237e94a 892
f996902d
RH
893#ifdef HAVE_AS_TLS
894#undef TARGET_HAVE_TLS
895#define TARGET_HAVE_TLS true
896#endif
897
f6897b10 898struct gcc_target targetm = TARGET_INITIALIZER;
e075ae69 899\f
f5316dfe
MM
900/* Sometimes certain combinations of command options do not make
901 sense on a particular target machine. You can define a macro
902 `OVERRIDE_OPTIONS' to take account of this. This macro, if
903 defined, is executed once just after all the command options have
904 been parsed.
905
906 Don't use this macro to turn on various extra optimizations for
907 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
908
909void
910override_options ()
911{
400500c4 912 int i;
e075ae69
RH
913 /* Comes from final.c -- no real reason to change it. */
914#define MAX_CODE_ALIGN 16
f5316dfe 915
c8c5cb99
SC
916 static struct ptt
917 {
8b60264b
KG
918 const struct processor_costs *cost; /* Processor costs */
919 const int target_enable; /* Target flags to enable. */
920 const int target_disable; /* Target flags to disable. */
921 const int align_loop; /* Default alignments. */
2cca7283 922 const int align_loop_max_skip;
8b60264b 923 const int align_jump;
2cca7283 924 const int align_jump_max_skip;
8b60264b
KG
925 const int align_func;
926 const int branch_cost;
e075ae69 927 }
0f290768 928 const processor_target_table[PROCESSOR_max] =
e075ae69 929 {
2cca7283
JH
930 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
931 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
932 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
933 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
934 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
935 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
936 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
e075ae69
RH
937 };
938
f4365627 939 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
e075ae69
RH
940 static struct pta
941 {
8b60264b
KG
942 const char *const name; /* processor name or nickname. */
943 const enum processor_type processor;
0dd0e980
JH
944 const enum pta_flags
945 {
946 PTA_SSE = 1,
947 PTA_SSE2 = 2,
948 PTA_MMX = 4,
f4365627 949 PTA_PREFETCH_SSE = 8,
0dd0e980
JH
950 PTA_3DNOW = 16,
951 PTA_3DNOW_A = 64
952 } flags;
e075ae69 953 }
0f290768 954 const processor_alias_table[] =
e075ae69 955 {
0dd0e980
JH
956 {"i386", PROCESSOR_I386, 0},
957 {"i486", PROCESSOR_I486, 0},
958 {"i586", PROCESSOR_PENTIUM, 0},
959 {"pentium", PROCESSOR_PENTIUM, 0},
960 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
3780101d
JG
961 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
962 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
963 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
0dd0e980
JH
964 {"i686", PROCESSOR_PENTIUMPRO, 0},
965 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
966 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
f4365627 967 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
0dd0e980 968 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
f4365627 969 PTA_MMX | PTA_PREFETCH_SSE},
0dd0e980
JH
970 {"k6", PROCESSOR_K6, PTA_MMX},
971 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
972 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
f4365627 973 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 974 | PTA_3DNOW_A},
f4365627 975 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
0dd0e980 976 | PTA_3DNOW | PTA_3DNOW_A},
f4365627 977 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 978 | PTA_3DNOW_A | PTA_SSE},
f4365627 979 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 980 | PTA_3DNOW_A | PTA_SSE},
f4365627 981 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 982 | PTA_3DNOW_A | PTA_SSE},
3af4bd89 983 };
c8c5cb99 984
ca7558fc 985 int const pta_size = ARRAY_SIZE (processor_alias_table);
c8c5cb99 986
3dc85dfb
RH
987 /* By default our XFmode is the 80-bit extended format. If we have
988 use TFmode instead, it's also the 80-bit format, but with padding. */
989 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
990 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
991
f5316dfe
MM
992#ifdef SUBTARGET_OVERRIDE_OPTIONS
993 SUBTARGET_OVERRIDE_OPTIONS;
994#endif
995
f4365627
JH
996 if (!ix86_cpu_string && ix86_arch_string)
997 ix86_cpu_string = ix86_arch_string;
998 if (!ix86_cpu_string)
999 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
1000 if (!ix86_arch_string)
1001 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
e075ae69 1002
6189a572
JH
1003 if (ix86_cmodel_string != 0)
1004 {
1005 if (!strcmp (ix86_cmodel_string, "small"))
1006 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1007 else if (flag_pic)
c725bd79 1008 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
6189a572
JH
1009 else if (!strcmp (ix86_cmodel_string, "32"))
1010 ix86_cmodel = CM_32;
1011 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1012 ix86_cmodel = CM_KERNEL;
1013 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1014 ix86_cmodel = CM_MEDIUM;
1015 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1016 ix86_cmodel = CM_LARGE;
1017 else
1018 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1019 }
1020 else
1021 {
1022 ix86_cmodel = CM_32;
1023 if (TARGET_64BIT)
1024 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1025 }
c93e80a5
JH
1026 if (ix86_asm_string != 0)
1027 {
1028 if (!strcmp (ix86_asm_string, "intel"))
1029 ix86_asm_dialect = ASM_INTEL;
1030 else if (!strcmp (ix86_asm_string, "att"))
1031 ix86_asm_dialect = ASM_ATT;
1032 else
1033 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1034 }
6189a572 1035 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
c725bd79 1036 error ("code model `%s' not supported in the %s bit mode",
6189a572
JH
1037 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1038 if (ix86_cmodel == CM_LARGE)
c725bd79 1039 sorry ("code model `large' not supported yet");
0c2dc519 1040 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
c725bd79 1041 sorry ("%i-bit mode not compiled in",
0c2dc519 1042 (target_flags & MASK_64BIT) ? 64 : 32);
6189a572 1043
f4365627
JH
1044 for (i = 0; i < pta_size; i++)
1045 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1046 {
1047 ix86_arch = processor_alias_table[i].processor;
1048 /* Default cpu tuning to the architecture. */
1049 ix86_cpu = ix86_arch;
1050 if (processor_alias_table[i].flags & PTA_MMX
9ef1b13a 1051 && !(target_flags_explicit & MASK_MMX))
f4365627
JH
1052 target_flags |= MASK_MMX;
1053 if (processor_alias_table[i].flags & PTA_3DNOW
9ef1b13a 1054 && !(target_flags_explicit & MASK_3DNOW))
f4365627
JH
1055 target_flags |= MASK_3DNOW;
1056 if (processor_alias_table[i].flags & PTA_3DNOW_A
9ef1b13a 1057 && !(target_flags_explicit & MASK_3DNOW_A))
f4365627
JH
1058 target_flags |= MASK_3DNOW_A;
1059 if (processor_alias_table[i].flags & PTA_SSE
9ef1b13a 1060 && !(target_flags_explicit & MASK_SSE))
f4365627
JH
1061 target_flags |= MASK_SSE;
1062 if (processor_alias_table[i].flags & PTA_SSE2
9ef1b13a 1063 && !(target_flags_explicit & MASK_SSE2))
f4365627
JH
1064 target_flags |= MASK_SSE2;
1065 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1066 x86_prefetch_sse = true;
1067 break;
1068 }
400500c4 1069
f4365627
JH
1070 if (i == pta_size)
1071 error ("bad value (%s) for -march= switch", ix86_arch_string);
e075ae69 1072
f4365627
JH
1073 for (i = 0; i < pta_size; i++)
1074 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1075 {
1076 ix86_cpu = processor_alias_table[i].processor;
1077 break;
1078 }
1079 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1080 x86_prefetch_sse = true;
1081 if (i == pta_size)
1082 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
e075ae69 1083
2ab0437e
JH
1084 if (optimize_size)
1085 ix86_cost = &size_cost;
1086 else
1087 ix86_cost = processor_target_table[ix86_cpu].cost;
e075ae69
RH
1088 target_flags |= processor_target_table[ix86_cpu].target_enable;
1089 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1090
36edd3cc
BS
1091 /* Arrange to set up i386_stack_locals for all functions. */
1092 init_machine_status = ix86_init_machine_status;
fce5a9f2 1093
0f290768 1094 /* Validate -mregparm= value. */
e075ae69 1095 if (ix86_regparm_string)
b08de47e 1096 {
400500c4
RK
1097 i = atoi (ix86_regparm_string);
1098 if (i < 0 || i > REGPARM_MAX)
1099 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1100 else
1101 ix86_regparm = i;
b08de47e 1102 }
0d7d98ee
JH
1103 else
1104 if (TARGET_64BIT)
1105 ix86_regparm = REGPARM_MAX;
b08de47e 1106
3e18fdf6 1107 /* If the user has provided any of the -malign-* options,
a4f31c00 1108 warn and use that value only if -falign-* is not set.
3e18fdf6 1109 Remove this code in GCC 3.2 or later. */
e075ae69 1110 if (ix86_align_loops_string)
b08de47e 1111 {
3e18fdf6
GK
1112 warning ("-malign-loops is obsolete, use -falign-loops");
1113 if (align_loops == 0)
1114 {
1115 i = atoi (ix86_align_loops_string);
1116 if (i < 0 || i > MAX_CODE_ALIGN)
1117 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1118 else
1119 align_loops = 1 << i;
1120 }
b08de47e 1121 }
3af4bd89 1122
e075ae69 1123 if (ix86_align_jumps_string)
b08de47e 1124 {
3e18fdf6
GK
1125 warning ("-malign-jumps is obsolete, use -falign-jumps");
1126 if (align_jumps == 0)
1127 {
1128 i = atoi (ix86_align_jumps_string);
1129 if (i < 0 || i > MAX_CODE_ALIGN)
1130 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1131 else
1132 align_jumps = 1 << i;
1133 }
b08de47e 1134 }
b08de47e 1135
e075ae69 1136 if (ix86_align_funcs_string)
b08de47e 1137 {
3e18fdf6
GK
1138 warning ("-malign-functions is obsolete, use -falign-functions");
1139 if (align_functions == 0)
1140 {
1141 i = atoi (ix86_align_funcs_string);
1142 if (i < 0 || i > MAX_CODE_ALIGN)
1143 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1144 else
1145 align_functions = 1 << i;
1146 }
b08de47e 1147 }
3af4bd89 1148
3e18fdf6 1149 /* Default align_* from the processor table. */
3e18fdf6 1150 if (align_loops == 0)
2cca7283
JH
1151 {
1152 align_loops = processor_target_table[ix86_cpu].align_loop;
1153 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1154 }
3e18fdf6 1155 if (align_jumps == 0)
2cca7283
JH
1156 {
1157 align_jumps = processor_target_table[ix86_cpu].align_jump;
1158 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1159 }
3e18fdf6 1160 if (align_functions == 0)
2cca7283
JH
1161 {
1162 align_functions = processor_target_table[ix86_cpu].align_func;
1163 }
3e18fdf6 1164
e4c0478d 1165 /* Validate -mpreferred-stack-boundary= value, or provide default.
fbb83b43
AO
1166 The default of 128 bits is for Pentium III's SSE __m128, but we
1167 don't want additional code to keep the stack aligned when
1168 optimizing for code size. */
1169 ix86_preferred_stack_boundary = (optimize_size
ef49d42e 1170 ? TARGET_64BIT ? 128 : 32
fbb83b43 1171 : 128);
e075ae69 1172 if (ix86_preferred_stack_boundary_string)
3af4bd89 1173 {
400500c4 1174 i = atoi (ix86_preferred_stack_boundary_string);
ef49d42e 1175 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
c6257c5d 1176 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
ef49d42e 1177 TARGET_64BIT ? 4 : 2);
400500c4
RK
1178 else
1179 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 1180 }
77a989d1 1181
0f290768 1182 /* Validate -mbranch-cost= value, or provide default. */
e075ae69
RH
1183 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1184 if (ix86_branch_cost_string)
804a8ee0 1185 {
400500c4
RK
1186 i = atoi (ix86_branch_cost_string);
1187 if (i < 0 || i > 5)
1188 error ("-mbranch-cost=%d is not between 0 and 5", i);
1189 else
1190 ix86_branch_cost = i;
804a8ee0 1191 }
804a8ee0 1192
f996902d
RH
1193 if (ix86_tls_dialect_string)
1194 {
1195 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1196 ix86_tls_dialect = TLS_DIALECT_GNU;
1197 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1198 ix86_tls_dialect = TLS_DIALECT_SUN;
1199 else
1200 error ("bad value (%s) for -mtls-dialect= switch",
1201 ix86_tls_dialect_string);
1202 }
1203
db01f480
JH
1204 if (profile_flag)
1205 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1206
e9a25f70
JL
1207 /* Keep nonleaf frame pointers. */
1208 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 1209 flag_omit_frame_pointer = 1;
e075ae69
RH
1210
1211 /* If we're doing fast math, we don't care about comparison order
1212 wrt NaNs. This lets us use a shorter comparison sequence. */
de6c5979 1213 if (flag_unsafe_math_optimizations)
e075ae69
RH
1214 target_flags &= ~MASK_IEEE_FP;
1215
30c99a84
RH
1216 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1217 since the insns won't need emulation. */
1218 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1219 target_flags &= ~MASK_NO_FANCY_MATH_387;
1220
14f73b5a
JH
1221 if (TARGET_64BIT)
1222 {
1223 if (TARGET_ALIGN_DOUBLE)
c725bd79 1224 error ("-malign-double makes no sense in the 64bit mode");
14f73b5a 1225 if (TARGET_RTD)
c725bd79 1226 error ("-mrtd calling convention not supported in the 64bit mode");
14f73b5a 1227 /* Enable by default the SSE and MMX builtins. */
965f5423
JH
1228 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1229 ix86_fpmath = FPMATH_SSE;
14f73b5a 1230 }
965f5423
JH
1231 else
1232 ix86_fpmath = FPMATH_387;
1233
1234 if (ix86_fpmath_string != 0)
1235 {
1236 if (! strcmp (ix86_fpmath_string, "387"))
1237 ix86_fpmath = FPMATH_387;
1238 else if (! strcmp (ix86_fpmath_string, "sse"))
1239 {
1240 if (!TARGET_SSE)
1241 {
1242 warning ("SSE instruction set disabled, using 387 arithmetics");
1243 ix86_fpmath = FPMATH_387;
1244 }
1245 else
1246 ix86_fpmath = FPMATH_SSE;
1247 }
1248 else if (! strcmp (ix86_fpmath_string, "387,sse")
1249 || ! strcmp (ix86_fpmath_string, "sse,387"))
1250 {
1251 if (!TARGET_SSE)
1252 {
1253 warning ("SSE instruction set disabled, using 387 arithmetics");
1254 ix86_fpmath = FPMATH_387;
1255 }
1256 else if (!TARGET_80387)
1257 {
1258 warning ("387 instruction set disabled, using SSE arithmetics");
1259 ix86_fpmath = FPMATH_SSE;
1260 }
1261 else
1262 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1263 }
fce5a9f2 1264 else
965f5423
JH
1265 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1266 }
14f73b5a 1267
a7180f70
BS
1268 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1269 on by -msse. */
1270 if (TARGET_SSE)
e37af218
RH
1271 {
1272 target_flags |= MASK_MMX;
1273 x86_prefetch_sse = true;
1274 }
c6036a37 1275
47f339cf
BS
1276 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1277 if (TARGET_3DNOW)
1278 {
1279 target_flags |= MASK_MMX;
1280 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1281 extensions it adds. */
1282 if (x86_3dnow_a & (1 << ix86_arch))
1283 target_flags |= MASK_3DNOW_A;
1284 }
c6036a37 1285 if ((x86_accumulate_outgoing_args & CPUMASK)
9ef1b13a 1286 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
c6036a37
JH
1287 && !optimize_size)
1288 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
623fe810
RH
1289
1290 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1291 {
1292 char *p;
1293 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1294 p = strchr (internal_label_prefix, 'X');
1295 internal_label_prefix_len = p - internal_label_prefix;
1296 *p = '\0';
1297 }
f5316dfe
MM
1298}
1299\f
32b5b1aa 1300void
c6aded7c 1301optimization_options (level, size)
32b5b1aa 1302 int level;
bb5177ac 1303 int size ATTRIBUTE_UNUSED;
32b5b1aa 1304{
e9a25f70
JL
1305 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1306 make the problem with not enough registers even worse. */
32b5b1aa
SC
1307#ifdef INSN_SCHEDULING
1308 if (level > 1)
1309 flag_schedule_insns = 0;
1310#endif
53c17031
JH
1311 if (TARGET_64BIT && optimize >= 1)
1312 flag_omit_frame_pointer = 1;
1313 if (TARGET_64BIT)
b932f770
JH
1314 {
1315 flag_pcc_struct_return = 0;
1316 flag_asynchronous_unwind_tables = 1;
1317 }
db01f480
JH
1318 if (profile_flag)
1319 flag_omit_frame_pointer = 0;
32b5b1aa 1320}
b08de47e 1321\f
91d231cb
JM
1322/* Table of valid machine attributes. */
1323const struct attribute_spec ix86_attribute_table[] =
b08de47e 1324{
91d231cb 1325 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
b08de47e
MM
1326 /* Stdcall attribute says callee is responsible for popping arguments
1327 if they are not variable. */
91d231cb
JM
1328 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1329 /* Cdecl attribute says the callee is a normal C declaration */
1330 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
b08de47e 1331 /* Regparm attribute specifies how many integer arguments are to be
0f290768 1332 passed in registers. */
91d231cb
JM
1333 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1334#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
3da1eb0b
DS
1335 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1336 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1337 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
91d231cb
JM
1338#endif
1339 { NULL, 0, 0, false, false, false, NULL }
1340};
1341
1342/* Handle a "cdecl" or "stdcall" attribute;
1343 arguments as in struct attribute_spec.handler. */
1344static tree
1345ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1346 tree *node;
1347 tree name;
1348 tree args ATTRIBUTE_UNUSED;
1349 int flags ATTRIBUTE_UNUSED;
1350 bool *no_add_attrs;
1351{
1352 if (TREE_CODE (*node) != FUNCTION_TYPE
1353 && TREE_CODE (*node) != METHOD_TYPE
1354 && TREE_CODE (*node) != FIELD_DECL
1355 && TREE_CODE (*node) != TYPE_DECL)
b08de47e 1356 {
91d231cb
JM
1357 warning ("`%s' attribute only applies to functions",
1358 IDENTIFIER_POINTER (name));
1359 *no_add_attrs = true;
1360 }
b08de47e 1361
91d231cb
JM
1362 if (TARGET_64BIT)
1363 {
1364 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1365 *no_add_attrs = true;
1366 }
b08de47e 1367
91d231cb
JM
1368 return NULL_TREE;
1369}
b08de47e 1370
91d231cb
JM
1371/* Handle a "regparm" attribute;
1372 arguments as in struct attribute_spec.handler. */
1373static tree
1374ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1375 tree *node;
1376 tree name;
1377 tree args;
1378 int flags ATTRIBUTE_UNUSED;
1379 bool *no_add_attrs;
1380{
1381 if (TREE_CODE (*node) != FUNCTION_TYPE
1382 && TREE_CODE (*node) != METHOD_TYPE
1383 && TREE_CODE (*node) != FIELD_DECL
1384 && TREE_CODE (*node) != TYPE_DECL)
1385 {
1386 warning ("`%s' attribute only applies to functions",
1387 IDENTIFIER_POINTER (name));
1388 *no_add_attrs = true;
1389 }
1390 else
1391 {
1392 tree cst;
b08de47e 1393
91d231cb
JM
1394 cst = TREE_VALUE (args);
1395 if (TREE_CODE (cst) != INTEGER_CST)
1396 {
1397 warning ("`%s' attribute requires an integer constant argument",
1398 IDENTIFIER_POINTER (name));
1399 *no_add_attrs = true;
1400 }
1401 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1402 {
1403 warning ("argument to `%s' attribute larger than %d",
1404 IDENTIFIER_POINTER (name), REGPARM_MAX);
1405 *no_add_attrs = true;
1406 }
b08de47e
MM
1407 }
1408
91d231cb 1409 return NULL_TREE;
b08de47e
MM
1410}
1411
1412/* Return 0 if the attributes for two types are incompatible, 1 if they
1413 are compatible, and 2 if they are nearly compatible (which causes a
1414 warning to be generated). */
1415
8d8e52be 1416static int
e075ae69 1417ix86_comp_type_attributes (type1, type2)
afcfe58c
MM
1418 tree type1;
1419 tree type2;
b08de47e 1420{
0f290768 1421 /* Check for mismatch of non-default calling convention. */
27c38fbe 1422 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
1423
1424 if (TREE_CODE (type1) != FUNCTION_TYPE)
1425 return 1;
1426
1427 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
1428 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1429 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 1430 return 0;
b08de47e
MM
1431 return 1;
1432}
b08de47e
MM
1433\f
1434/* Value is the number of bytes of arguments automatically
1435 popped when returning from a subroutine call.
1436 FUNDECL is the declaration node of the function (as a tree),
1437 FUNTYPE is the data type of the function (as a tree),
1438 or for a library call it is an identifier node for the subroutine name.
1439 SIZE is the number of bytes of arguments passed on the stack.
1440
1441 On the 80386, the RTD insn may be used to pop them if the number
1442 of args is fixed, but if the number is variable then the caller
1443 must pop them all. RTD can't be used for library calls now
1444 because the library is compiled with the Unix compiler.
1445 Use of RTD is a selectable option, since it is incompatible with
1446 standard Unix calling sequences. If the option is not selected,
1447 the caller must always pop the args.
1448
1449 The attribute stdcall is equivalent to RTD on a per module basis. */
1450
1451int
e075ae69 1452ix86_return_pops_args (fundecl, funtype, size)
b08de47e
MM
1453 tree fundecl;
1454 tree funtype;
1455 int size;
79325812 1456{
3345ee7d 1457 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 1458
0f290768 1459 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 1460 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 1461
0f290768 1462 /* Stdcall functions will pop the stack if not variable args. */
698cdd84
SC
1463 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1464 rtd = 1;
79325812 1465
698cdd84
SC
1466 if (rtd
1467 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
1468 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1469 == void_type_node)))
698cdd84
SC
1470 return size;
1471 }
79325812 1472
232b8f52 1473 /* Lose any fake structure return argument if it is passed on the stack. */
0d7d98ee
JH
1474 if (aggregate_value_p (TREE_TYPE (funtype))
1475 && !TARGET_64BIT)
232b8f52
JJ
1476 {
1477 int nregs = ix86_regparm;
79325812 1478
232b8f52
JJ
1479 if (funtype)
1480 {
1481 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype));
1482
1483 if (attr)
1484 nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1485 }
1486
1487 if (!nregs)
1488 return GET_MODE_SIZE (Pmode);
1489 }
1490
1491 return 0;
b08de47e 1492}
b08de47e
MM
1493\f
1494/* Argument support functions. */
1495
53c17031
JH
1496/* Return true when register may be used to pass function parameters. */
1497bool
1498ix86_function_arg_regno_p (regno)
1499 int regno;
1500{
1501 int i;
1502 if (!TARGET_64BIT)
0333394e
JJ
1503 return (regno < REGPARM_MAX
1504 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
53c17031
JH
1505 if (SSE_REGNO_P (regno) && TARGET_SSE)
1506 return true;
1507 /* RAX is used as hidden argument to va_arg functions. */
1508 if (!regno)
1509 return true;
1510 for (i = 0; i < REGPARM_MAX; i++)
1511 if (regno == x86_64_int_parameter_registers[i])
1512 return true;
1513 return false;
1514}
1515
b08de47e
MM
1516/* Initialize a variable CUM of type CUMULATIVE_ARGS
1517 for a call to a function whose data type is FNTYPE.
1518 For a library call, FNTYPE is 0. */
1519
1520void
1521init_cumulative_args (cum, fntype, libname)
e9a25f70 1522 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
b08de47e
MM
1523 tree fntype; /* tree ptr for function decl */
1524 rtx libname; /* SYMBOL_REF of library name or 0 */
1525{
1526 static CUMULATIVE_ARGS zero_cum;
1527 tree param, next_param;
1528
1529 if (TARGET_DEBUG_ARG)
1530 {
1531 fprintf (stderr, "\ninit_cumulative_args (");
1532 if (fntype)
e9a25f70
JL
1533 fprintf (stderr, "fntype code = %s, ret code = %s",
1534 tree_code_name[(int) TREE_CODE (fntype)],
1535 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
1536 else
1537 fprintf (stderr, "no fntype");
1538
1539 if (libname)
1540 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1541 }
1542
1543 *cum = zero_cum;
1544
1545 /* Set up the number of registers to use for passing arguments. */
e075ae69 1546 cum->nregs = ix86_regparm;
53c17031
JH
1547 cum->sse_nregs = SSE_REGPARM_MAX;
1548 if (fntype && !TARGET_64BIT)
b08de47e
MM
1549 {
1550 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
e9a25f70 1551
b08de47e
MM
1552 if (attr)
1553 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1554 }
53c17031 1555 cum->maybe_vaarg = false;
b08de47e
MM
1556
1557 /* Determine if this function has variable arguments. This is
1558 indicated by the last argument being 'void_type_mode' if there
1559 are no variable arguments. If there are variable arguments, then
1560 we won't pass anything in registers */
1561
1562 if (cum->nregs)
1563 {
1564 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 1565 param != 0; param = next_param)
b08de47e
MM
1566 {
1567 next_param = TREE_CHAIN (param);
e9a25f70 1568 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
53c17031
JH
1569 {
1570 if (!TARGET_64BIT)
1571 cum->nregs = 0;
1572 cum->maybe_vaarg = true;
1573 }
b08de47e
MM
1574 }
1575 }
53c17031
JH
1576 if ((!fntype && !libname)
1577 || (fntype && !TYPE_ARG_TYPES (fntype)))
1578 cum->maybe_vaarg = 1;
b08de47e
MM
1579
1580 if (TARGET_DEBUG_ARG)
1581 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1582
1583 return;
1584}
1585
53c17031 1586/* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
f710504c 1587 of this code is to classify each 8bytes of incoming argument by the register
53c17031
JH
1588 class and assign registers accordingly. */
1589
1590/* Return the union class of CLASS1 and CLASS2.
1591 See the x86-64 PS ABI for details. */
1592
1593static enum x86_64_reg_class
1594merge_classes (class1, class2)
1595 enum x86_64_reg_class class1, class2;
1596{
1597 /* Rule #1: If both classes are equal, this is the resulting class. */
1598 if (class1 == class2)
1599 return class1;
1600
1601 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1602 the other class. */
1603 if (class1 == X86_64_NO_CLASS)
1604 return class2;
1605 if (class2 == X86_64_NO_CLASS)
1606 return class1;
1607
1608 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1609 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1610 return X86_64_MEMORY_CLASS;
1611
1612 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1613 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1614 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1615 return X86_64_INTEGERSI_CLASS;
1616 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1617 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1618 return X86_64_INTEGER_CLASS;
1619
1620 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1621 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1622 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1623 return X86_64_MEMORY_CLASS;
1624
1625 /* Rule #6: Otherwise class SSE is used. */
1626 return X86_64_SSE_CLASS;
1627}
1628
1629/* Classify the argument of type TYPE and mode MODE.
1630 CLASSES will be filled by the register class used to pass each word
1631 of the operand. The number of words is returned. In case the parameter
1632 should be passed in memory, 0 is returned. As a special case for zero
1633 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1634
1635 BIT_OFFSET is used internally for handling records and specifies offset
1636 of the offset in bits modulo 256 to avoid overflow cases.
1637
1638 See the x86-64 PS ABI for details.
1639*/
1640
1641static int
1642classify_argument (mode, type, classes, bit_offset)
1643 enum machine_mode mode;
1644 tree type;
1645 enum x86_64_reg_class classes[MAX_CLASSES];
1646 int bit_offset;
1647{
1648 int bytes =
1649 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
23327dae 1650 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
53c17031 1651
c60ee6f5
JH
1652 /* Variable sized entities are always passed/returned in memory. */
1653 if (bytes < 0)
1654 return 0;
1655
53c17031
JH
1656 if (type && AGGREGATE_TYPE_P (type))
1657 {
1658 int i;
1659 tree field;
1660 enum x86_64_reg_class subclasses[MAX_CLASSES];
1661
1662 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1663 if (bytes > 16)
1664 return 0;
1665
1666 for (i = 0; i < words; i++)
1667 classes[i] = X86_64_NO_CLASS;
1668
1669 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1670 signalize memory class, so handle it as special case. */
1671 if (!words)
1672 {
1673 classes[0] = X86_64_NO_CLASS;
1674 return 1;
1675 }
1676
1677 /* Classify each field of record and merge classes. */
1678 if (TREE_CODE (type) == RECORD_TYPE)
1679 {
91ea38f9
JH
1680 /* For classes first merge in the field of the subclasses. */
1681 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1682 {
1683 tree bases = TYPE_BINFO_BASETYPES (type);
1684 int n_bases = TREE_VEC_LENGTH (bases);
1685 int i;
1686
1687 for (i = 0; i < n_bases; ++i)
1688 {
1689 tree binfo = TREE_VEC_ELT (bases, i);
1690 int num;
1691 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1692 tree type = BINFO_TYPE (binfo);
1693
1694 num = classify_argument (TYPE_MODE (type),
1695 type, subclasses,
1696 (offset + bit_offset) % 256);
1697 if (!num)
1698 return 0;
1699 for (i = 0; i < num; i++)
1700 {
db01f480 1701 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
1702 classes[i + pos] =
1703 merge_classes (subclasses[i], classes[i + pos]);
1704 }
1705 }
1706 }
1707 /* And now merge the fields of structure. */
53c17031
JH
1708 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1709 {
1710 if (TREE_CODE (field) == FIELD_DECL)
1711 {
1712 int num;
1713
1714 /* Bitfields are always classified as integer. Handle them
1715 early, since later code would consider them to be
1716 misaligned integers. */
1717 if (DECL_BIT_FIELD (field))
1718 {
1719 for (i = int_bit_position (field) / 8 / 8;
1720 i < (int_bit_position (field)
1721 + tree_low_cst (DECL_SIZE (field), 0)
1722 + 63) / 8 / 8; i++)
1723 classes[i] =
1724 merge_classes (X86_64_INTEGER_CLASS,
1725 classes[i]);
1726 }
1727 else
1728 {
1729 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1730 TREE_TYPE (field), subclasses,
1731 (int_bit_position (field)
1732 + bit_offset) % 256);
1733 if (!num)
1734 return 0;
1735 for (i = 0; i < num; i++)
1736 {
1737 int pos =
db01f480 1738 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
53c17031
JH
1739 classes[i + pos] =
1740 merge_classes (subclasses[i], classes[i + pos]);
1741 }
1742 }
1743 }
1744 }
1745 }
1746 /* Arrays are handled as small records. */
1747 else if (TREE_CODE (type) == ARRAY_TYPE)
1748 {
1749 int num;
1750 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1751 TREE_TYPE (type), subclasses, bit_offset);
1752 if (!num)
1753 return 0;
1754
1755 /* The partial classes are now full classes. */
1756 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1757 subclasses[0] = X86_64_SSE_CLASS;
1758 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1759 subclasses[0] = X86_64_INTEGER_CLASS;
1760
1761 for (i = 0; i < words; i++)
1762 classes[i] = subclasses[i % num];
1763 }
1764 /* Unions are similar to RECORD_TYPE but offset is always 0. */
e4dbaed5
AS
1765 else if (TREE_CODE (type) == UNION_TYPE
1766 || TREE_CODE (type) == QUAL_UNION_TYPE)
53c17031 1767 {
91ea38f9
JH
1768 /* For classes first merge in the field of the subclasses. */
1769 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1770 {
1771 tree bases = TYPE_BINFO_BASETYPES (type);
1772 int n_bases = TREE_VEC_LENGTH (bases);
1773 int i;
1774
1775 for (i = 0; i < n_bases; ++i)
1776 {
1777 tree binfo = TREE_VEC_ELT (bases, i);
1778 int num;
1779 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1780 tree type = BINFO_TYPE (binfo);
1781
1782 num = classify_argument (TYPE_MODE (type),
1783 type, subclasses,
db01f480 1784 (offset + (bit_offset % 64)) % 256);
91ea38f9
JH
1785 if (!num)
1786 return 0;
1787 for (i = 0; i < num; i++)
1788 {
c16576e6 1789 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
1790 classes[i + pos] =
1791 merge_classes (subclasses[i], classes[i + pos]);
1792 }
1793 }
1794 }
53c17031
JH
1795 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1796 {
1797 if (TREE_CODE (field) == FIELD_DECL)
1798 {
1799 int num;
1800 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1801 TREE_TYPE (field), subclasses,
1802 bit_offset);
1803 if (!num)
1804 return 0;
1805 for (i = 0; i < num; i++)
1806 classes[i] = merge_classes (subclasses[i], classes[i]);
1807 }
1808 }
1809 }
1810 else
1811 abort ();
1812
1813 /* Final merger cleanup. */
1814 for (i = 0; i < words; i++)
1815 {
1816 /* If one class is MEMORY, everything should be passed in
1817 memory. */
1818 if (classes[i] == X86_64_MEMORY_CLASS)
1819 return 0;
1820
d6a7951f 1821 /* The X86_64_SSEUP_CLASS should be always preceded by
53c17031
JH
1822 X86_64_SSE_CLASS. */
1823 if (classes[i] == X86_64_SSEUP_CLASS
1824 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1825 classes[i] = X86_64_SSE_CLASS;
1826
d6a7951f 1827 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
53c17031
JH
1828 if (classes[i] == X86_64_X87UP_CLASS
1829 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1830 classes[i] = X86_64_SSE_CLASS;
1831 }
1832 return words;
1833 }
1834
1835 /* Compute alignment needed. We align all types to natural boundaries with
1836 exception of XFmode that is aligned to 64bits. */
1837 if (mode != VOIDmode && mode != BLKmode)
1838 {
1839 int mode_alignment = GET_MODE_BITSIZE (mode);
1840
1841 if (mode == XFmode)
1842 mode_alignment = 128;
1843 else if (mode == XCmode)
1844 mode_alignment = 256;
f5143c46 1845 /* Misaligned fields are always returned in memory. */
53c17031
JH
1846 if (bit_offset % mode_alignment)
1847 return 0;
1848 }
1849
1850 /* Classification of atomic types. */
1851 switch (mode)
1852 {
1853 case DImode:
1854 case SImode:
1855 case HImode:
1856 case QImode:
1857 case CSImode:
1858 case CHImode:
1859 case CQImode:
1860 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1861 classes[0] = X86_64_INTEGERSI_CLASS;
1862 else
1863 classes[0] = X86_64_INTEGER_CLASS;
1864 return 1;
1865 case CDImode:
1866 case TImode:
1867 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1868 return 2;
1869 case CTImode:
1870 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1871 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1872 return 4;
1873 case SFmode:
1874 if (!(bit_offset % 64))
1875 classes[0] = X86_64_SSESF_CLASS;
1876 else
1877 classes[0] = X86_64_SSE_CLASS;
1878 return 1;
1879 case DFmode:
1880 classes[0] = X86_64_SSEDF_CLASS;
1881 return 1;
1882 case TFmode:
1883 classes[0] = X86_64_X87_CLASS;
1884 classes[1] = X86_64_X87UP_CLASS;
1885 return 2;
1886 case TCmode:
1887 classes[0] = X86_64_X87_CLASS;
1888 classes[1] = X86_64_X87UP_CLASS;
1889 classes[2] = X86_64_X87_CLASS;
1890 classes[3] = X86_64_X87UP_CLASS;
1891 return 4;
1892 case DCmode:
1893 classes[0] = X86_64_SSEDF_CLASS;
1894 classes[1] = X86_64_SSEDF_CLASS;
1895 return 2;
1896 case SCmode:
1897 classes[0] = X86_64_SSE_CLASS;
1898 return 1;
e95d6b23
JH
1899 case V4SFmode:
1900 case V4SImode:
495333a6
JH
1901 case V16QImode:
1902 case V8HImode:
1903 case V2DFmode:
1904 case V2DImode:
e95d6b23
JH
1905 classes[0] = X86_64_SSE_CLASS;
1906 classes[1] = X86_64_SSEUP_CLASS;
1907 return 2;
1908 case V2SFmode:
1909 case V2SImode:
1910 case V4HImode:
1911 case V8QImode:
1912 classes[0] = X86_64_SSE_CLASS;
1913 return 1;
53c17031 1914 case BLKmode:
e95d6b23 1915 case VOIDmode:
53c17031
JH
1916 return 0;
1917 default:
1918 abort ();
1919 }
1920}
1921
1922/* Examine the argument and return set number of register required in each
f5143c46 1923 class. Return 0 iff parameter should be passed in memory. */
53c17031
JH
1924static int
1925examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1926 enum machine_mode mode;
1927 tree type;
1928 int *int_nregs, *sse_nregs;
1929 int in_return;
1930{
1931 enum x86_64_reg_class class[MAX_CLASSES];
1932 int n = classify_argument (mode, type, class, 0);
1933
1934 *int_nregs = 0;
1935 *sse_nregs = 0;
1936 if (!n)
1937 return 0;
1938 for (n--; n >= 0; n--)
1939 switch (class[n])
1940 {
1941 case X86_64_INTEGER_CLASS:
1942 case X86_64_INTEGERSI_CLASS:
1943 (*int_nregs)++;
1944 break;
1945 case X86_64_SSE_CLASS:
1946 case X86_64_SSESF_CLASS:
1947 case X86_64_SSEDF_CLASS:
1948 (*sse_nregs)++;
1949 break;
1950 case X86_64_NO_CLASS:
1951 case X86_64_SSEUP_CLASS:
1952 break;
1953 case X86_64_X87_CLASS:
1954 case X86_64_X87UP_CLASS:
1955 if (!in_return)
1956 return 0;
1957 break;
1958 case X86_64_MEMORY_CLASS:
1959 abort ();
1960 }
1961 return 1;
1962}
1963/* Construct container for the argument used by GCC interface. See
1964 FUNCTION_ARG for the detailed description. */
1965static rtx
1966construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1967 enum machine_mode mode;
1968 tree type;
1969 int in_return;
1970 int nintregs, nsseregs;
07933f72
GS
1971 const int * intreg;
1972 int sse_regno;
53c17031
JH
1973{
1974 enum machine_mode tmpmode;
1975 int bytes =
1976 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1977 enum x86_64_reg_class class[MAX_CLASSES];
1978 int n;
1979 int i;
1980 int nexps = 0;
1981 int needed_sseregs, needed_intregs;
1982 rtx exp[MAX_CLASSES];
1983 rtx ret;
1984
1985 n = classify_argument (mode, type, class, 0);
1986 if (TARGET_DEBUG_ARG)
1987 {
1988 if (!n)
1989 fprintf (stderr, "Memory class\n");
1990 else
1991 {
1992 fprintf (stderr, "Classes:");
1993 for (i = 0; i < n; i++)
1994 {
1995 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1996 }
1997 fprintf (stderr, "\n");
1998 }
1999 }
2000 if (!n)
2001 return NULL;
2002 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2003 return NULL;
2004 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2005 return NULL;
2006
2007 /* First construct simple cases. Avoid SCmode, since we want to use
2008 single register to pass this type. */
2009 if (n == 1 && mode != SCmode)
2010 switch (class[0])
2011 {
2012 case X86_64_INTEGER_CLASS:
2013 case X86_64_INTEGERSI_CLASS:
2014 return gen_rtx_REG (mode, intreg[0]);
2015 case X86_64_SSE_CLASS:
2016 case X86_64_SSESF_CLASS:
2017 case X86_64_SSEDF_CLASS:
2018 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2019 case X86_64_X87_CLASS:
2020 return gen_rtx_REG (mode, FIRST_STACK_REG);
2021 case X86_64_NO_CLASS:
2022 /* Zero sized array, struct or class. */
2023 return NULL;
2024 default:
2025 abort ();
2026 }
2027 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
e95d6b23 2028 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
53c17031
JH
2029 if (n == 2
2030 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2031 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2032 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2033 && class[1] == X86_64_INTEGER_CLASS
2034 && (mode == CDImode || mode == TImode)
2035 && intreg[0] + 1 == intreg[1])
2036 return gen_rtx_REG (mode, intreg[0]);
2037 if (n == 4
2038 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2039 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2040 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2041
2042 /* Otherwise figure out the entries of the PARALLEL. */
2043 for (i = 0; i < n; i++)
2044 {
2045 switch (class[i])
2046 {
2047 case X86_64_NO_CLASS:
2048 break;
2049 case X86_64_INTEGER_CLASS:
2050 case X86_64_INTEGERSI_CLASS:
2051 /* Merge TImodes on aligned occassions here too. */
2052 if (i * 8 + 8 > bytes)
2053 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2054 else if (class[i] == X86_64_INTEGERSI_CLASS)
2055 tmpmode = SImode;
2056 else
2057 tmpmode = DImode;
2058 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2059 if (tmpmode == BLKmode)
2060 tmpmode = DImode;
2061 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2062 gen_rtx_REG (tmpmode, *intreg),
2063 GEN_INT (i*8));
2064 intreg++;
2065 break;
2066 case X86_64_SSESF_CLASS:
2067 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2068 gen_rtx_REG (SFmode,
2069 SSE_REGNO (sse_regno)),
2070 GEN_INT (i*8));
2071 sse_regno++;
2072 break;
2073 case X86_64_SSEDF_CLASS:
2074 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2075 gen_rtx_REG (DFmode,
2076 SSE_REGNO (sse_regno)),
2077 GEN_INT (i*8));
2078 sse_regno++;
2079 break;
2080 case X86_64_SSE_CLASS:
2081 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2082 tmpmode = TImode, i++;
2083 else
2084 tmpmode = DImode;
2085 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2086 gen_rtx_REG (tmpmode,
2087 SSE_REGNO (sse_regno)),
2088 GEN_INT (i*8));
2089 sse_regno++;
2090 break;
2091 default:
2092 abort ();
2093 }
2094 }
2095 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2096 for (i = 0; i < nexps; i++)
2097 XVECEXP (ret, 0, i) = exp [i];
2098 return ret;
2099}
2100
b08de47e
MM
2101/* Update the data in CUM to advance over an argument
2102 of mode MODE and data type TYPE.
2103 (TYPE is null for libcalls where that information may not be available.) */
2104
2105void
2106function_arg_advance (cum, mode, type, named)
2107 CUMULATIVE_ARGS *cum; /* current arg information */
2108 enum machine_mode mode; /* current arg mode */
2109 tree type; /* type of the argument or 0 if lib support */
2110 int named; /* whether or not the argument was named */
2111{
5ac9118e
KG
2112 int bytes =
2113 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2114 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2115
2116 if (TARGET_DEBUG_ARG)
2117 fprintf (stderr,
e9a25f70 2118 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
b08de47e 2119 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
53c17031 2120 if (TARGET_64BIT)
b08de47e 2121 {
53c17031
JH
2122 int int_nregs, sse_nregs;
2123 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2124 cum->words += words;
2125 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
82a127a9 2126 {
53c17031
JH
2127 cum->nregs -= int_nregs;
2128 cum->sse_nregs -= sse_nregs;
2129 cum->regno += int_nregs;
2130 cum->sse_regno += sse_nregs;
82a127a9 2131 }
53c17031
JH
2132 else
2133 cum->words += words;
b08de47e 2134 }
a4f31c00 2135 else
82a127a9 2136 {
53c17031
JH
2137 if (TARGET_SSE && mode == TImode)
2138 {
2139 cum->sse_words += words;
2140 cum->sse_nregs -= 1;
2141 cum->sse_regno += 1;
2142 if (cum->sse_nregs <= 0)
2143 {
2144 cum->sse_nregs = 0;
2145 cum->sse_regno = 0;
2146 }
2147 }
2148 else
82a127a9 2149 {
53c17031
JH
2150 cum->words += words;
2151 cum->nregs -= words;
2152 cum->regno += words;
2153
2154 if (cum->nregs <= 0)
2155 {
2156 cum->nregs = 0;
2157 cum->regno = 0;
2158 }
82a127a9
CM
2159 }
2160 }
b08de47e
MM
2161 return;
2162}
2163
2164/* Define where to put the arguments to a function.
2165 Value is zero to push the argument on the stack,
2166 or a hard register in which to store the argument.
2167
2168 MODE is the argument's machine mode.
2169 TYPE is the data type of the argument (as a tree).
2170 This is null for libcalls where that information may
2171 not be available.
2172 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2173 the preceding args and about the function being called.
2174 NAMED is nonzero if this argument is a named parameter
2175 (otherwise it is an extra parameter matching an ellipsis). */
2176
07933f72 2177rtx
b08de47e
MM
2178function_arg (cum, mode, type, named)
2179 CUMULATIVE_ARGS *cum; /* current arg information */
2180 enum machine_mode mode; /* current arg mode */
2181 tree type; /* type of the argument or 0 if lib support */
2182 int named; /* != 0 for normal args, == 0 for ... args */
2183{
2184 rtx ret = NULL_RTX;
5ac9118e
KG
2185 int bytes =
2186 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2187 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2188
53c17031
JH
2189 /* Handle an hidden AL argument containing number of registers for varargs
2190 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2191 any AL settings. */
32ee7d1d 2192 if (mode == VOIDmode)
b08de47e 2193 {
53c17031
JH
2194 if (TARGET_64BIT)
2195 return GEN_INT (cum->maybe_vaarg
2196 ? (cum->sse_nregs < 0
2197 ? SSE_REGPARM_MAX
2198 : cum->sse_regno)
2199 : -1);
2200 else
2201 return constm1_rtx;
b08de47e 2202 }
53c17031
JH
2203 if (TARGET_64BIT)
2204 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2205 &x86_64_int_parameter_registers [cum->regno],
2206 cum->sse_regno);
2207 else
2208 switch (mode)
2209 {
2210 /* For now, pass fp/complex values on the stack. */
2211 default:
2212 break;
2213
2214 case BLKmode:
2215 case DImode:
2216 case SImode:
2217 case HImode:
2218 case QImode:
2219 if (words <= cum->nregs)
2220 ret = gen_rtx_REG (mode, cum->regno);
2221 break;
2222 case TImode:
2223 if (cum->sse_nregs)
2224 ret = gen_rtx_REG (mode, cum->sse_regno);
2225 break;
2226 }
b08de47e
MM
2227
2228 if (TARGET_DEBUG_ARG)
2229 {
2230 fprintf (stderr,
91ea38f9 2231 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
b08de47e
MM
2232 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2233
2234 if (ret)
91ea38f9 2235 print_simple_rtl (stderr, ret);
b08de47e
MM
2236 else
2237 fprintf (stderr, ", stack");
2238
2239 fprintf (stderr, " )\n");
2240 }
2241
2242 return ret;
2243}
53c17031
JH
2244
2245/* Gives the alignment boundary, in bits, of an argument with the specified mode
2246 and type. */
2247
2248int
2249ix86_function_arg_boundary (mode, type)
2250 enum machine_mode mode;
2251 tree type;
2252{
2253 int align;
2254 if (!TARGET_64BIT)
2255 return PARM_BOUNDARY;
2256 if (type)
2257 align = TYPE_ALIGN (type);
2258 else
2259 align = GET_MODE_ALIGNMENT (mode);
2260 if (align < PARM_BOUNDARY)
2261 align = PARM_BOUNDARY;
2262 if (align > 128)
2263 align = 128;
2264 return align;
2265}
2266
2267/* Return true if N is a possible register number of function value. */
2268bool
2269ix86_function_value_regno_p (regno)
2270 int regno;
2271{
2272 if (!TARGET_64BIT)
2273 {
2274 return ((regno) == 0
2275 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2276 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2277 }
2278 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2279 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2280 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2281}
2282
2283/* Define how to find the value returned by a function.
2284 VALTYPE is the data type of the value (as a tree).
2285 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2286 otherwise, FUNC is 0. */
2287rtx
2288ix86_function_value (valtype)
2289 tree valtype;
2290{
2291 if (TARGET_64BIT)
2292 {
2293 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2294 REGPARM_MAX, SSE_REGPARM_MAX,
2295 x86_64_int_return_registers, 0);
2296 /* For zero sized structures, construct_continer return NULL, but we need
2297 to keep rest of compiler happy by returning meaningfull value. */
2298 if (!ret)
2299 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2300 return ret;
2301 }
2302 else
b069de3b
SS
2303 return gen_rtx_REG (TYPE_MODE (valtype),
2304 ix86_value_regno (TYPE_MODE (valtype)));
53c17031
JH
2305}
2306
f5143c46 2307/* Return false iff type is returned in memory. */
53c17031
JH
2308int
2309ix86_return_in_memory (type)
2310 tree type;
2311{
2312 int needed_intregs, needed_sseregs;
2313 if (TARGET_64BIT)
2314 {
2315 return !examine_argument (TYPE_MODE (type), type, 1,
2316 &needed_intregs, &needed_sseregs);
2317 }
2318 else
2319 {
2320 if (TYPE_MODE (type) == BLKmode
2321 || (VECTOR_MODE_P (TYPE_MODE (type))
2322 && int_size_in_bytes (type) == 8)
2323 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2324 && TYPE_MODE (type) != TFmode
2325 && !VECTOR_MODE_P (TYPE_MODE (type))))
2326 return 1;
2327 return 0;
2328 }
2329}
2330
2331/* Define how to find the value returned by a library function
2332 assuming the value has mode MODE. */
2333rtx
2334ix86_libcall_value (mode)
2335 enum machine_mode mode;
2336{
2337 if (TARGET_64BIT)
2338 {
2339 switch (mode)
2340 {
2341 case SFmode:
2342 case SCmode:
2343 case DFmode:
2344 case DCmode:
2345 return gen_rtx_REG (mode, FIRST_SSE_REG);
2346 case TFmode:
2347 case TCmode:
2348 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2349 default:
2350 return gen_rtx_REG (mode, 0);
2351 }
2352 }
2353 else
b069de3b
SS
2354 return gen_rtx_REG (mode, ix86_value_regno (mode));
2355}
2356
2357/* Given a mode, return the register to use for a return value. */
2358
2359static int
2360ix86_value_regno (mode)
2361 enum machine_mode mode;
2362{
2363 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2364 return FIRST_FLOAT_REG;
2365 if (mode == TImode || VECTOR_MODE_P (mode))
2366 return FIRST_SSE_REG;
2367 return 0;
53c17031 2368}
ad919812
JH
2369\f
2370/* Create the va_list data type. */
53c17031 2371
ad919812
JH
2372tree
2373ix86_build_va_list ()
2374{
2375 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
53c17031 2376
ad919812
JH
2377 /* For i386 we use plain pointer to argument area. */
2378 if (!TARGET_64BIT)
2379 return build_pointer_type (char_type_node);
2380
f1e639b1 2381 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
ad919812
JH
2382 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2383
fce5a9f2 2384 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
ad919812 2385 unsigned_type_node);
fce5a9f2 2386 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
ad919812
JH
2387 unsigned_type_node);
2388 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2389 ptr_type_node);
2390 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2391 ptr_type_node);
2392
2393 DECL_FIELD_CONTEXT (f_gpr) = record;
2394 DECL_FIELD_CONTEXT (f_fpr) = record;
2395 DECL_FIELD_CONTEXT (f_ovf) = record;
2396 DECL_FIELD_CONTEXT (f_sav) = record;
2397
2398 TREE_CHAIN (record) = type_decl;
2399 TYPE_NAME (record) = type_decl;
2400 TYPE_FIELDS (record) = f_gpr;
2401 TREE_CHAIN (f_gpr) = f_fpr;
2402 TREE_CHAIN (f_fpr) = f_ovf;
2403 TREE_CHAIN (f_ovf) = f_sav;
2404
2405 layout_type (record);
2406
2407 /* The correct type is an array type of one element. */
2408 return build_array_type (record, build_index_type (size_zero_node));
2409}
2410
2411/* Perform any needed actions needed for a function that is receiving a
fce5a9f2 2412 variable number of arguments.
ad919812
JH
2413
2414 CUM is as above.
2415
2416 MODE and TYPE are the mode and type of the current parameter.
2417
2418 PRETEND_SIZE is a variable that should be set to the amount of stack
2419 that must be pushed by the prolog to pretend that our caller pushed
2420 it.
2421
2422 Normally, this macro will push all remaining incoming registers on the
2423 stack and set PRETEND_SIZE to the length of the registers pushed. */
2424
2425void
2426ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2427 CUMULATIVE_ARGS *cum;
2428 enum machine_mode mode;
2429 tree type;
2430 int *pretend_size ATTRIBUTE_UNUSED;
2431 int no_rtl;
2432
2433{
2434 CUMULATIVE_ARGS next_cum;
2435 rtx save_area = NULL_RTX, mem;
2436 rtx label;
2437 rtx label_ref;
2438 rtx tmp_reg;
2439 rtx nsse_reg;
2440 int set;
2441 tree fntype;
2442 int stdarg_p;
2443 int i;
2444
2445 if (!TARGET_64BIT)
2446 return;
2447
2448 /* Indicate to allocate space on the stack for varargs save area. */
2449 ix86_save_varrargs_registers = 1;
2450
2451 fntype = TREE_TYPE (current_function_decl);
2452 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2453 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2454 != void_type_node));
2455
2456 /* For varargs, we do not want to skip the dummy va_dcl argument.
2457 For stdargs, we do want to skip the last named argument. */
2458 next_cum = *cum;
2459 if (stdarg_p)
2460 function_arg_advance (&next_cum, mode, type, 1);
2461
2462 if (!no_rtl)
2463 save_area = frame_pointer_rtx;
2464
2465 set = get_varargs_alias_set ();
2466
2467 for (i = next_cum.regno; i < ix86_regparm; i++)
2468 {
2469 mem = gen_rtx_MEM (Pmode,
2470 plus_constant (save_area, i * UNITS_PER_WORD));
0692acba 2471 set_mem_alias_set (mem, set);
ad919812
JH
2472 emit_move_insn (mem, gen_rtx_REG (Pmode,
2473 x86_64_int_parameter_registers[i]));
2474 }
2475
2476 if (next_cum.sse_nregs)
2477 {
2478 /* Now emit code to save SSE registers. The AX parameter contains number
2479 of SSE parameter regsiters used to call this function. We use
2480 sse_prologue_save insn template that produces computed jump across
2481 SSE saves. We need some preparation work to get this working. */
2482
2483 label = gen_label_rtx ();
2484 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2485
2486 /* Compute address to jump to :
2487 label - 5*eax + nnamed_sse_arguments*5 */
2488 tmp_reg = gen_reg_rtx (Pmode);
2489 nsse_reg = gen_reg_rtx (Pmode);
2490 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2491 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
44cf5b6a 2492 gen_rtx_MULT (Pmode, nsse_reg,
ad919812
JH
2493 GEN_INT (4))));
2494 if (next_cum.sse_regno)
2495 emit_move_insn
2496 (nsse_reg,
2497 gen_rtx_CONST (DImode,
2498 gen_rtx_PLUS (DImode,
2499 label_ref,
2500 GEN_INT (next_cum.sse_regno * 4))));
2501 else
2502 emit_move_insn (nsse_reg, label_ref);
2503 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2504
2505 /* Compute address of memory block we save into. We always use pointer
2506 pointing 127 bytes after first byte to store - this is needed to keep
2507 instruction size limited by 4 bytes. */
2508 tmp_reg = gen_reg_rtx (Pmode);
8ac61af7
RK
2509 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2510 plus_constant (save_area,
2511 8 * REGPARM_MAX + 127)));
ad919812 2512 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
14f73b5a 2513 set_mem_alias_set (mem, set);
8ac61af7 2514 set_mem_align (mem, BITS_PER_WORD);
ad919812
JH
2515
2516 /* And finally do the dirty job! */
8ac61af7
RK
2517 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2518 GEN_INT (next_cum.sse_regno), label));
ad919812
JH
2519 }
2520
2521}
2522
2523/* Implement va_start. */
2524
2525void
e5faf155 2526ix86_va_start (valist, nextarg)
ad919812
JH
2527 tree valist;
2528 rtx nextarg;
2529{
2530 HOST_WIDE_INT words, n_gpr, n_fpr;
2531 tree f_gpr, f_fpr, f_ovf, f_sav;
2532 tree gpr, fpr, ovf, sav, t;
2533
2534 /* Only 64bit target needs something special. */
2535 if (!TARGET_64BIT)
2536 {
e5faf155 2537 std_expand_builtin_va_start (valist, nextarg);
ad919812
JH
2538 return;
2539 }
2540
2541 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2542 f_fpr = TREE_CHAIN (f_gpr);
2543 f_ovf = TREE_CHAIN (f_fpr);
2544 f_sav = TREE_CHAIN (f_ovf);
2545
2546 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2547 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2548 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2549 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2550 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2551
2552 /* Count number of gp and fp argument registers used. */
2553 words = current_function_args_info.words;
2554 n_gpr = current_function_args_info.regno;
2555 n_fpr = current_function_args_info.sse_regno;
2556
2557 if (TARGET_DEBUG_ARG)
2558 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
b531087a 2559 (int) words, (int) n_gpr, (int) n_fpr);
ad919812
JH
2560
2561 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2562 build_int_2 (n_gpr * 8, 0));
2563 TREE_SIDE_EFFECTS (t) = 1;
2564 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2565
2566 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2567 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2568 TREE_SIDE_EFFECTS (t) = 1;
2569 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2570
2571 /* Find the overflow area. */
2572 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2573 if (words != 0)
2574 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2575 build_int_2 (words * UNITS_PER_WORD, 0));
2576 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2577 TREE_SIDE_EFFECTS (t) = 1;
2578 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2579
2580 /* Find the register save area.
2581 Prologue of the function save it right above stack frame. */
2582 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2583 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2584 TREE_SIDE_EFFECTS (t) = 1;
2585 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2586}
2587
2588/* Implement va_arg. */
2589rtx
2590ix86_va_arg (valist, type)
2591 tree valist, type;
2592{
0139adca 2593 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
ad919812
JH
2594 tree f_gpr, f_fpr, f_ovf, f_sav;
2595 tree gpr, fpr, ovf, sav, t;
b932f770 2596 int size, rsize;
ad919812
JH
2597 rtx lab_false, lab_over = NULL_RTX;
2598 rtx addr_rtx, r;
2599 rtx container;
2600
2601 /* Only 64bit target needs something special. */
2602 if (!TARGET_64BIT)
2603 {
2604 return std_expand_builtin_va_arg (valist, type);
2605 }
2606
2607 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2608 f_fpr = TREE_CHAIN (f_gpr);
2609 f_ovf = TREE_CHAIN (f_fpr);
2610 f_sav = TREE_CHAIN (f_ovf);
2611
2612 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2613 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2614 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2615 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2616 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2617
2618 size = int_size_in_bytes (type);
2619 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2620
2621 container = construct_container (TYPE_MODE (type), type, 0,
2622 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2623 /*
2624 * Pull the value out of the saved registers ...
2625 */
2626
2627 addr_rtx = gen_reg_rtx (Pmode);
2628
2629 if (container)
2630 {
2631 rtx int_addr_rtx, sse_addr_rtx;
2632 int needed_intregs, needed_sseregs;
2633 int need_temp;
2634
2635 lab_over = gen_label_rtx ();
2636 lab_false = gen_label_rtx ();
8bad7136 2637
ad919812
JH
2638 examine_argument (TYPE_MODE (type), type, 0,
2639 &needed_intregs, &needed_sseregs);
2640
2641
2642 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2643 || TYPE_ALIGN (type) > 128);
2644
2645 /* In case we are passing structure, verify that it is consetuctive block
2646 on the register save area. If not we need to do moves. */
2647 if (!need_temp && !REG_P (container))
2648 {
2649 /* Verify that all registers are strictly consetuctive */
2650 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2651 {
2652 int i;
2653
2654 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2655 {
2656 rtx slot = XVECEXP (container, 0, i);
b531087a 2657 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
ad919812
JH
2658 || INTVAL (XEXP (slot, 1)) != i * 16)
2659 need_temp = 1;
2660 }
2661 }
2662 else
2663 {
2664 int i;
2665
2666 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2667 {
2668 rtx slot = XVECEXP (container, 0, i);
b531087a 2669 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
ad919812
JH
2670 || INTVAL (XEXP (slot, 1)) != i * 8)
2671 need_temp = 1;
2672 }
2673 }
2674 }
2675 if (!need_temp)
2676 {
2677 int_addr_rtx = addr_rtx;
2678 sse_addr_rtx = addr_rtx;
2679 }
2680 else
2681 {
2682 int_addr_rtx = gen_reg_rtx (Pmode);
2683 sse_addr_rtx = gen_reg_rtx (Pmode);
2684 }
2685 /* First ensure that we fit completely in registers. */
2686 if (needed_intregs)
2687 {
2688 emit_cmp_and_jump_insns (expand_expr
2689 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2690 GEN_INT ((REGPARM_MAX - needed_intregs +
2691 1) * 8), GE, const1_rtx, SImode,
d43e0b7d 2692 1, lab_false);
ad919812
JH
2693 }
2694 if (needed_sseregs)
2695 {
2696 emit_cmp_and_jump_insns (expand_expr
2697 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2698 GEN_INT ((SSE_REGPARM_MAX -
2699 needed_sseregs + 1) * 16 +
2700 REGPARM_MAX * 8), GE, const1_rtx,
d43e0b7d 2701 SImode, 1, lab_false);
ad919812
JH
2702 }
2703
2704 /* Compute index to start of area used for integer regs. */
2705 if (needed_intregs)
2706 {
2707 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2708 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2709 if (r != int_addr_rtx)
2710 emit_move_insn (int_addr_rtx, r);
2711 }
2712 if (needed_sseregs)
2713 {
2714 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2715 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2716 if (r != sse_addr_rtx)
2717 emit_move_insn (sse_addr_rtx, r);
2718 }
2719 if (need_temp)
2720 {
2721 int i;
2722 rtx mem;
2723
b932f770
JH
2724 /* Never use the memory itself, as it has the alias set. */
2725 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2726 mem = gen_rtx_MEM (BLKmode, addr_rtx);
0692acba 2727 set_mem_alias_set (mem, get_varargs_alias_set ());
8ac61af7 2728 set_mem_align (mem, BITS_PER_UNIT);
b932f770 2729
ad919812
JH
2730 for (i = 0; i < XVECLEN (container, 0); i++)
2731 {
2732 rtx slot = XVECEXP (container, 0, i);
2733 rtx reg = XEXP (slot, 0);
2734 enum machine_mode mode = GET_MODE (reg);
2735 rtx src_addr;
2736 rtx src_mem;
2737 int src_offset;
2738 rtx dest_mem;
2739
2740 if (SSE_REGNO_P (REGNO (reg)))
2741 {
2742 src_addr = sse_addr_rtx;
2743 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2744 }
2745 else
2746 {
2747 src_addr = int_addr_rtx;
2748 src_offset = REGNO (reg) * 8;
2749 }
2750 src_mem = gen_rtx_MEM (mode, src_addr);
0692acba 2751 set_mem_alias_set (src_mem, get_varargs_alias_set ());
ad919812
JH
2752 src_mem = adjust_address (src_mem, mode, src_offset);
2753 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
ad919812
JH
2754 emit_move_insn (dest_mem, src_mem);
2755 }
2756 }
2757
2758 if (needed_intregs)
2759 {
2760 t =
2761 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2762 build_int_2 (needed_intregs * 8, 0));
2763 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2764 TREE_SIDE_EFFECTS (t) = 1;
2765 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2766 }
2767 if (needed_sseregs)
2768 {
2769 t =
2770 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2771 build_int_2 (needed_sseregs * 16, 0));
2772 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2773 TREE_SIDE_EFFECTS (t) = 1;
2774 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2775 }
2776
2777 emit_jump_insn (gen_jump (lab_over));
2778 emit_barrier ();
2779 emit_label (lab_false);
2780 }
2781
2782 /* ... otherwise out of the overflow area. */
2783
2784 /* Care for on-stack alignment if needed. */
2785 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2786 t = ovf;
2787 else
2788 {
2789 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2790 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2791 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2792 }
2793 t = save_expr (t);
2794
2795 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2796 if (r != addr_rtx)
2797 emit_move_insn (addr_rtx, r);
2798
2799 t =
2800 build (PLUS_EXPR, TREE_TYPE (t), t,
2801 build_int_2 (rsize * UNITS_PER_WORD, 0));
2802 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2803 TREE_SIDE_EFFECTS (t) = 1;
2804 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2805
2806 if (container)
2807 emit_label (lab_over);
2808
ad919812
JH
2809 return addr_rtx;
2810}
2811\f
c3c637e3
GS
2812/* Return nonzero if OP is either a i387 or SSE fp register. */
2813int
2814any_fp_register_operand (op, mode)
2815 rtx op;
2816 enum machine_mode mode ATTRIBUTE_UNUSED;
2817{
2818 return ANY_FP_REG_P (op);
2819}
2820
2821/* Return nonzero if OP is an i387 fp register. */
2822int
2823fp_register_operand (op, mode)
2824 rtx op;
2825 enum machine_mode mode ATTRIBUTE_UNUSED;
2826{
2827 return FP_REG_P (op);
2828}
2829
2830/* Return nonzero if OP is a non-fp register_operand. */
2831int
2832register_and_not_any_fp_reg_operand (op, mode)
2833 rtx op;
2834 enum machine_mode mode;
2835{
2836 return register_operand (op, mode) && !ANY_FP_REG_P (op);
2837}
2838
2839/* Return nonzero of OP is a register operand other than an
2840 i387 fp register. */
2841int
2842register_and_not_fp_reg_operand (op, mode)
2843 rtx op;
2844 enum machine_mode mode;
2845{
2846 return register_operand (op, mode) && !FP_REG_P (op);
2847}
2848
7dd4b4a3
JH
2849/* Return nonzero if OP is general operand representable on x86_64. */
2850
2851int
2852x86_64_general_operand (op, mode)
2853 rtx op;
2854 enum machine_mode mode;
2855{
2856 if (!TARGET_64BIT)
2857 return general_operand (op, mode);
2858 if (nonimmediate_operand (op, mode))
2859 return 1;
2860 return x86_64_sign_extended_value (op);
2861}
2862
2863/* Return nonzero if OP is general operand representable on x86_64
d6a7951f 2864 as either sign extended or zero extended constant. */
7dd4b4a3
JH
2865
2866int
2867x86_64_szext_general_operand (op, mode)
2868 rtx op;
2869 enum machine_mode mode;
2870{
2871 if (!TARGET_64BIT)
2872 return general_operand (op, mode);
2873 if (nonimmediate_operand (op, mode))
2874 return 1;
2875 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2876}
2877
2878/* Return nonzero if OP is nonmemory operand representable on x86_64. */
2879
2880int
2881x86_64_nonmemory_operand (op, mode)
2882 rtx op;
2883 enum machine_mode mode;
2884{
2885 if (!TARGET_64BIT)
2886 return nonmemory_operand (op, mode);
2887 if (register_operand (op, mode))
2888 return 1;
2889 return x86_64_sign_extended_value (op);
2890}
2891
2892/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2893
2894int
2895x86_64_movabs_operand (op, mode)
2896 rtx op;
2897 enum machine_mode mode;
2898{
2899 if (!TARGET_64BIT || !flag_pic)
2900 return nonmemory_operand (op, mode);
2901 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2902 return 1;
2903 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2904 return 1;
2905 return 0;
2906}
2907
2908/* Return nonzero if OP is nonmemory operand representable on x86_64. */
2909
2910int
2911x86_64_szext_nonmemory_operand (op, mode)
2912 rtx op;
2913 enum machine_mode mode;
2914{
2915 if (!TARGET_64BIT)
2916 return nonmemory_operand (op, mode);
2917 if (register_operand (op, mode))
2918 return 1;
2919 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2920}
2921
2922/* Return nonzero if OP is immediate operand representable on x86_64. */
2923
2924int
2925x86_64_immediate_operand (op, mode)
2926 rtx op;
2927 enum machine_mode mode;
2928{
2929 if (!TARGET_64BIT)
2930 return immediate_operand (op, mode);
2931 return x86_64_sign_extended_value (op);
2932}
2933
2934/* Return nonzero if OP is immediate operand representable on x86_64. */
2935
2936int
2937x86_64_zext_immediate_operand (op, mode)
2938 rtx op;
2939 enum machine_mode mode ATTRIBUTE_UNUSED;
2940{
2941 return x86_64_zero_extended_value (op);
2942}
2943
8bad7136
JL
2944/* Return nonzero if OP is (const_int 1), else return zero. */
2945
2946int
2947const_int_1_operand (op, mode)
2948 rtx op;
2949 enum machine_mode mode ATTRIBUTE_UNUSED;
2950{
2951 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2952}
2953
794a292d
JJ
2954/* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
2955 for shift & compare patterns, as shifting by 0 does not change flags),
2956 else return zero. */
2957
2958int
2959const_int_1_31_operand (op, mode)
2960 rtx op;
2961 enum machine_mode mode ATTRIBUTE_UNUSED;
2962{
2963 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
2964}
2965
e075ae69
RH
2966/* Returns 1 if OP is either a symbol reference or a sum of a symbol
2967 reference and a constant. */
b08de47e
MM
2968
2969int
e075ae69
RH
2970symbolic_operand (op, mode)
2971 register rtx op;
2972 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 2973{
e075ae69 2974 switch (GET_CODE (op))
2a2ab3f9 2975 {
e075ae69
RH
2976 case SYMBOL_REF:
2977 case LABEL_REF:
2978 return 1;
2979
2980 case CONST:
2981 op = XEXP (op, 0);
2982 if (GET_CODE (op) == SYMBOL_REF
2983 || GET_CODE (op) == LABEL_REF
2984 || (GET_CODE (op) == UNSPEC
8ee41eaf
RH
2985 && (XINT (op, 1) == UNSPEC_GOT
2986 || XINT (op, 1) == UNSPEC_GOTOFF
2987 || XINT (op, 1) == UNSPEC_GOTPCREL)))
e075ae69
RH
2988 return 1;
2989 if (GET_CODE (op) != PLUS
2990 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2991 return 0;
2992
2993 op = XEXP (op, 0);
2994 if (GET_CODE (op) == SYMBOL_REF
2995 || GET_CODE (op) == LABEL_REF)
2996 return 1;
2997 /* Only @GOTOFF gets offsets. */
2998 if (GET_CODE (op) != UNSPEC
8ee41eaf 2999 || XINT (op, 1) != UNSPEC_GOTOFF)
e075ae69
RH
3000 return 0;
3001
3002 op = XVECEXP (op, 0, 0);
3003 if (GET_CODE (op) == SYMBOL_REF
3004 || GET_CODE (op) == LABEL_REF)
3005 return 1;
3006 return 0;
3007
3008 default:
3009 return 0;
2a2ab3f9
JVA
3010 }
3011}
2a2ab3f9 3012
e075ae69 3013/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 3014
e075ae69
RH
3015int
3016pic_symbolic_operand (op, mode)
3017 register rtx op;
3018 enum machine_mode mode ATTRIBUTE_UNUSED;
3019{
6eb791fc
JH
3020 if (GET_CODE (op) != CONST)
3021 return 0;
3022 op = XEXP (op, 0);
3023 if (TARGET_64BIT)
3024 {
3025 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3026 return 1;
3027 }
fce5a9f2 3028 else
2a2ab3f9 3029 {
e075ae69
RH
3030 if (GET_CODE (op) == UNSPEC)
3031 return 1;
3032 if (GET_CODE (op) != PLUS
3033 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3034 return 0;
3035 op = XEXP (op, 0);
3036 if (GET_CODE (op) == UNSPEC)
3037 return 1;
2a2ab3f9 3038 }
e075ae69 3039 return 0;
2a2ab3f9 3040}
2a2ab3f9 3041
623fe810
RH
3042/* Return true if OP is a symbolic operand that resolves locally. */
3043
3044static int
3045local_symbolic_operand (op, mode)
3046 rtx op;
3047 enum machine_mode mode ATTRIBUTE_UNUSED;
3048{
3049 if (GET_CODE (op) == LABEL_REF)
3050 return 1;
3051
3052 if (GET_CODE (op) == CONST
3053 && GET_CODE (XEXP (op, 0)) == PLUS
3054 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3055 op = XEXP (XEXP (op, 0), 0);
3056
3057 if (GET_CODE (op) != SYMBOL_REF)
3058 return 0;
3059
3060 /* These we've been told are local by varasm and encode_section_info
3061 respectively. */
3062 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3063 return 1;
3064
3065 /* There is, however, a not insubstantial body of code in the rest of
fce5a9f2 3066 the compiler that assumes it can just stick the results of
623fe810
RH
3067 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3068 /* ??? This is a hack. Should update the body of the compiler to
fb49053f 3069 always create a DECL an invoke targetm.encode_section_info. */
623fe810
RH
3070 if (strncmp (XSTR (op, 0), internal_label_prefix,
3071 internal_label_prefix_len) == 0)
3072 return 1;
3073
3074 return 0;
3075}
3076
f996902d
RH
3077/* Test for various thread-local symbols. See ix86_encode_section_info. */
3078
3079int
3080tls_symbolic_operand (op, mode)
3081 register rtx op;
3082 enum machine_mode mode ATTRIBUTE_UNUSED;
3083{
3084 const char *symbol_str;
3085
3086 if (GET_CODE (op) != SYMBOL_REF)
3087 return 0;
3088 symbol_str = XSTR (op, 0);
3089
3090 if (symbol_str[0] != '%')
3091 return 0;
755ac5d4 3092 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
f996902d
RH
3093}
3094
3095static int
3096tls_symbolic_operand_1 (op, kind)
3097 rtx op;
3098 enum tls_model kind;
3099{
3100 const char *symbol_str;
3101
3102 if (GET_CODE (op) != SYMBOL_REF)
3103 return 0;
3104 symbol_str = XSTR (op, 0);
3105
3106 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3107}
3108
3109int
3110global_dynamic_symbolic_operand (op, mode)
3111 register rtx op;
3112 enum machine_mode mode ATTRIBUTE_UNUSED;
3113{
3114 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3115}
3116
3117int
3118local_dynamic_symbolic_operand (op, mode)
3119 register rtx op;
3120 enum machine_mode mode ATTRIBUTE_UNUSED;
3121{
3122 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3123}
3124
3125int
3126initial_exec_symbolic_operand (op, mode)
3127 register rtx op;
3128 enum machine_mode mode ATTRIBUTE_UNUSED;
3129{
3130 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3131}
3132
3133int
3134local_exec_symbolic_operand (op, mode)
3135 register rtx op;
3136 enum machine_mode mode ATTRIBUTE_UNUSED;
3137{
3138 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3139}
3140
28d52ffb
RH
3141/* Test for a valid operand for a call instruction. Don't allow the
3142 arg pointer register or virtual regs since they may decay into
3143 reg + const, which the patterns can't handle. */
2a2ab3f9 3144
e075ae69
RH
3145int
3146call_insn_operand (op, mode)
3147 rtx op;
3148 enum machine_mode mode ATTRIBUTE_UNUSED;
3149{
e075ae69
RH
3150 /* Disallow indirect through a virtual register. This leads to
3151 compiler aborts when trying to eliminate them. */
3152 if (GET_CODE (op) == REG
3153 && (op == arg_pointer_rtx
564d80f4 3154 || op == frame_pointer_rtx
e075ae69
RH
3155 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3156 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3157 return 0;
2a2ab3f9 3158
28d52ffb
RH
3159 /* Disallow `call 1234'. Due to varying assembler lameness this
3160 gets either rejected or translated to `call .+1234'. */
3161 if (GET_CODE (op) == CONST_INT)
3162 return 0;
3163
cbbf65e0
RH
3164 /* Explicitly allow SYMBOL_REF even if pic. */
3165 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 3166 return 1;
2a2ab3f9 3167
cbbf65e0
RH
3168 /* Otherwise we can allow any general_operand in the address. */
3169 return general_operand (op, Pmode);
e075ae69 3170}
79325812 3171
e075ae69
RH
3172int
3173constant_call_address_operand (op, mode)
3174 rtx op;
3175 enum machine_mode mode ATTRIBUTE_UNUSED;
3176{
eaf19aba
JJ
3177 if (GET_CODE (op) == CONST
3178 && GET_CODE (XEXP (op, 0)) == PLUS
3179 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3180 op = XEXP (XEXP (op, 0), 0);
e1ff012c 3181 return GET_CODE (op) == SYMBOL_REF;
e075ae69 3182}
2a2ab3f9 3183
e075ae69 3184/* Match exactly zero and one. */
e9a25f70 3185
0f290768 3186int
e075ae69
RH
3187const0_operand (op, mode)
3188 register rtx op;
3189 enum machine_mode mode;
3190{
3191 return op == CONST0_RTX (mode);
3192}
e9a25f70 3193
0f290768 3194int
e075ae69
RH
3195const1_operand (op, mode)
3196 register rtx op;
3197 enum machine_mode mode ATTRIBUTE_UNUSED;
3198{
3199 return op == const1_rtx;
3200}
2a2ab3f9 3201
e075ae69 3202/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 3203
e075ae69
RH
3204int
3205const248_operand (op, mode)
3206 register rtx op;
3207 enum machine_mode mode ATTRIBUTE_UNUSED;
3208{
3209 return (GET_CODE (op) == CONST_INT
3210 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3211}
e9a25f70 3212
e075ae69 3213/* True if this is a constant appropriate for an increment or decremenmt. */
81fd0956 3214
e075ae69
RH
3215int
3216incdec_operand (op, mode)
3217 register rtx op;
0631e0bf 3218 enum machine_mode mode ATTRIBUTE_UNUSED;
e075ae69 3219{
f5143c46 3220 /* On Pentium4, the inc and dec operations causes extra dependency on flag
b4e89e2d
JH
3221 registers, since carry flag is not set. */
3222 if (TARGET_PENTIUM4 && !optimize_size)
3223 return 0;
2b1c08f5 3224 return op == const1_rtx || op == constm1_rtx;
e075ae69 3225}
2a2ab3f9 3226
371bc54b
JH
3227/* Return nonzero if OP is acceptable as operand of DImode shift
3228 expander. */
3229
3230int
3231shiftdi_operand (op, mode)
3232 rtx op;
3233 enum machine_mode mode ATTRIBUTE_UNUSED;
3234{
3235 if (TARGET_64BIT)
3236 return nonimmediate_operand (op, mode);
3237 else
3238 return register_operand (op, mode);
3239}
3240
0f290768 3241/* Return false if this is the stack pointer, or any other fake
e075ae69
RH
3242 register eliminable to the stack pointer. Otherwise, this is
3243 a register operand.
2a2ab3f9 3244
e075ae69
RH
3245 This is used to prevent esp from being used as an index reg.
3246 Which would only happen in pathological cases. */
5f1ec3e6 3247
e075ae69
RH
3248int
3249reg_no_sp_operand (op, mode)
3250 register rtx op;
3251 enum machine_mode mode;
3252{
3253 rtx t = op;
3254 if (GET_CODE (t) == SUBREG)
3255 t = SUBREG_REG (t);
564d80f4 3256 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 3257 return 0;
2a2ab3f9 3258
e075ae69 3259 return register_operand (op, mode);
2a2ab3f9 3260}
b840bfb0 3261
915119a5
BS
3262int
3263mmx_reg_operand (op, mode)
3264 register rtx op;
bd793c65 3265 enum machine_mode mode ATTRIBUTE_UNUSED;
915119a5
BS
3266{
3267 return MMX_REG_P (op);
3268}
3269
2c5a510c
RH
3270/* Return false if this is any eliminable register. Otherwise
3271 general_operand. */
3272
3273int
3274general_no_elim_operand (op, mode)
3275 register rtx op;
3276 enum machine_mode mode;
3277{
3278 rtx t = op;
3279 if (GET_CODE (t) == SUBREG)
3280 t = SUBREG_REG (t);
3281 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3282 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3283 || t == virtual_stack_dynamic_rtx)
3284 return 0;
1020a5ab
RH
3285 if (REG_P (t)
3286 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3287 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3288 return 0;
2c5a510c
RH
3289
3290 return general_operand (op, mode);
3291}
3292
3293/* Return false if this is any eliminable register. Otherwise
3294 register_operand or const_int. */
3295
3296int
3297nonmemory_no_elim_operand (op, mode)
3298 register rtx op;
3299 enum machine_mode mode;
3300{
3301 rtx t = op;
3302 if (GET_CODE (t) == SUBREG)
3303 t = SUBREG_REG (t);
3304 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3305 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3306 || t == virtual_stack_dynamic_rtx)
3307 return 0;
3308
3309 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3310}
3311
7ec70495
JH
3312/* Return false if this is any eliminable register or stack register,
3313 otherwise work like register_operand. */
3314
3315int
3316index_register_operand (op, mode)
3317 register rtx op;
3318 enum machine_mode mode;
3319{
3320 rtx t = op;
3321 if (GET_CODE (t) == SUBREG)
3322 t = SUBREG_REG (t);
3323 if (!REG_P (t))
3324 return 0;
3325 if (t == arg_pointer_rtx
3326 || t == frame_pointer_rtx
3327 || t == virtual_incoming_args_rtx
3328 || t == virtual_stack_vars_rtx
3329 || t == virtual_stack_dynamic_rtx
3330 || REGNO (t) == STACK_POINTER_REGNUM)
3331 return 0;
3332
3333 return general_operand (op, mode);
3334}
3335
e075ae69 3336/* Return true if op is a Q_REGS class register. */
b840bfb0 3337
e075ae69
RH
3338int
3339q_regs_operand (op, mode)
3340 register rtx op;
3341 enum machine_mode mode;
b840bfb0 3342{
e075ae69
RH
3343 if (mode != VOIDmode && GET_MODE (op) != mode)
3344 return 0;
3345 if (GET_CODE (op) == SUBREG)
3346 op = SUBREG_REG (op);
7799175f 3347 return ANY_QI_REG_P (op);
0f290768 3348}
b840bfb0 3349
e075ae69 3350/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 3351
e075ae69
RH
3352int
3353non_q_regs_operand (op, mode)
3354 register rtx op;
3355 enum machine_mode mode;
3356{
3357 if (mode != VOIDmode && GET_MODE (op) != mode)
3358 return 0;
3359 if (GET_CODE (op) == SUBREG)
3360 op = SUBREG_REG (op);
3361 return NON_QI_REG_P (op);
0f290768 3362}
b840bfb0 3363
915119a5
BS
3364/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3365 insns. */
3366int
3367sse_comparison_operator (op, mode)
3368 rtx op;
3369 enum machine_mode mode ATTRIBUTE_UNUSED;
3370{
3371 enum rtx_code code = GET_CODE (op);
a46d1d38
JH
3372 switch (code)
3373 {
3374 /* Operations supported directly. */
3375 case EQ:
3376 case LT:
3377 case LE:
3378 case UNORDERED:
3379 case NE:
3380 case UNGE:
3381 case UNGT:
3382 case ORDERED:
3383 return 1;
3384 /* These are equivalent to ones above in non-IEEE comparisons. */
3385 case UNEQ:
3386 case UNLT:
3387 case UNLE:
3388 case LTGT:
3389 case GE:
3390 case GT:
3391 return !TARGET_IEEE_FP;
3392 default:
3393 return 0;
3394 }
915119a5 3395}
9076b9c1 3396/* Return 1 if OP is a valid comparison operator in valid mode. */
e075ae69 3397int
9076b9c1
JH
3398ix86_comparison_operator (op, mode)
3399 register rtx op;
3400 enum machine_mode mode;
e075ae69 3401{
9076b9c1 3402 enum machine_mode inmode;
9a915772 3403 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
3404 if (mode != VOIDmode && GET_MODE (op) != mode)
3405 return 0;
9a915772
JH
3406 if (GET_RTX_CLASS (code) != '<')
3407 return 0;
3408 inmode = GET_MODE (XEXP (op, 0));
3409
3410 if (inmode == CCFPmode || inmode == CCFPUmode)
3411 {
3412 enum rtx_code second_code, bypass_code;
3413 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3414 return (bypass_code == NIL && second_code == NIL);
3415 }
3416 switch (code)
3a3677ff
RH
3417 {
3418 case EQ: case NE:
3a3677ff 3419 return 1;
9076b9c1 3420 case LT: case GE:
7e08e190 3421 if (inmode == CCmode || inmode == CCGCmode
9076b9c1
JH
3422 || inmode == CCGOCmode || inmode == CCNOmode)
3423 return 1;
3424 return 0;
7e08e190 3425 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
7e08e190 3426 if (inmode == CCmode)
9076b9c1
JH
3427 return 1;
3428 return 0;
3429 case GT: case LE:
7e08e190 3430 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
9076b9c1
JH
3431 return 1;
3432 return 0;
3a3677ff
RH
3433 default:
3434 return 0;
3435 }
3436}
3437
9076b9c1 3438/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3a3677ff 3439
9076b9c1
JH
3440int
3441fcmov_comparison_operator (op, mode)
3a3677ff
RH
3442 register rtx op;
3443 enum machine_mode mode;
3444{
b62d22a2 3445 enum machine_mode inmode;
9a915772 3446 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
3447 if (mode != VOIDmode && GET_MODE (op) != mode)
3448 return 0;
9a915772
JH
3449 if (GET_RTX_CLASS (code) != '<')
3450 return 0;
3451 inmode = GET_MODE (XEXP (op, 0));
3452 if (inmode == CCFPmode || inmode == CCFPUmode)
3a3677ff 3453 {
9a915772
JH
3454 enum rtx_code second_code, bypass_code;
3455 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3456 if (bypass_code != NIL || second_code != NIL)
3457 return 0;
3458 code = ix86_fp_compare_code_to_integer (code);
3459 }
3460 /* i387 supports just limited amount of conditional codes. */
3461 switch (code)
3462 {
3463 case LTU: case GTU: case LEU: case GEU:
3464 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
9076b9c1
JH
3465 return 1;
3466 return 0;
9a915772
JH
3467 case ORDERED: case UNORDERED:
3468 case EQ: case NE:
3469 return 1;
3a3677ff
RH
3470 default:
3471 return 0;
3472 }
e075ae69 3473}
b840bfb0 3474
e9e80858
JH
3475/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3476
3477int
3478promotable_binary_operator (op, mode)
3479 register rtx op;
3480 enum machine_mode mode ATTRIBUTE_UNUSED;
3481{
3482 switch (GET_CODE (op))
3483 {
3484 case MULT:
3485 /* Modern CPUs have same latency for HImode and SImode multiply,
3486 but 386 and 486 do HImode multiply faster. */
3487 return ix86_cpu > PROCESSOR_I486;
3488 case PLUS:
3489 case AND:
3490 case IOR:
3491 case XOR:
3492 case ASHIFT:
3493 return 1;
3494 default:
3495 return 0;
3496 }
3497}
3498
e075ae69
RH
3499/* Nearly general operand, but accept any const_double, since we wish
3500 to be able to drop them into memory rather than have them get pulled
3501 into registers. */
b840bfb0 3502
2a2ab3f9 3503int
e075ae69
RH
3504cmp_fp_expander_operand (op, mode)
3505 register rtx op;
3506 enum machine_mode mode;
2a2ab3f9 3507{
e075ae69 3508 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 3509 return 0;
e075ae69 3510 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 3511 return 1;
e075ae69 3512 return general_operand (op, mode);
2a2ab3f9
JVA
3513}
3514
e075ae69 3515/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
3516
3517int
e075ae69 3518ext_register_operand (op, mode)
2a2ab3f9 3519 register rtx op;
bb5177ac 3520 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 3521{
3522082b 3522 int regno;
0d7d98ee
JH
3523 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3524 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
e075ae69 3525 return 0;
3522082b
JH
3526
3527 if (!register_operand (op, VOIDmode))
3528 return 0;
3529
3530 /* Be curefull to accept only registers having upper parts. */
3531 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3532 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
e075ae69
RH
3533}
3534
3535/* Return 1 if this is a valid binary floating-point operation.
0f290768 3536 OP is the expression matched, and MODE is its mode. */
e075ae69
RH
3537
3538int
3539binary_fp_operator (op, mode)
3540 register rtx op;
3541 enum machine_mode mode;
3542{
3543 if (mode != VOIDmode && mode != GET_MODE (op))
3544 return 0;
3545
2a2ab3f9
JVA
3546 switch (GET_CODE (op))
3547 {
e075ae69
RH
3548 case PLUS:
3549 case MINUS:
3550 case MULT:
3551 case DIV:
3552 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 3553
2a2ab3f9
JVA
3554 default:
3555 return 0;
3556 }
3557}
fee2770d 3558
e075ae69 3559int
b531087a 3560mult_operator (op, mode)
e075ae69
RH
3561 register rtx op;
3562 enum machine_mode mode ATTRIBUTE_UNUSED;
3563{
3564 return GET_CODE (op) == MULT;
3565}
3566
3567int
b531087a 3568div_operator (op, mode)
e075ae69
RH
3569 register rtx op;
3570 enum machine_mode mode ATTRIBUTE_UNUSED;
3571{
3572 return GET_CODE (op) == DIV;
3573}
0a726ef1
JL
3574
3575int
e075ae69
RH
3576arith_or_logical_operator (op, mode)
3577 rtx op;
3578 enum machine_mode mode;
0a726ef1 3579{
e075ae69
RH
3580 return ((mode == VOIDmode || GET_MODE (op) == mode)
3581 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3582 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
3583}
3584
e075ae69 3585/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
3586
3587int
e075ae69
RH
3588memory_displacement_operand (op, mode)
3589 register rtx op;
3590 enum machine_mode mode;
4f2c8ebb 3591{
e075ae69 3592 struct ix86_address parts;
e9a25f70 3593
e075ae69
RH
3594 if (! memory_operand (op, mode))
3595 return 0;
3596
3597 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3598 abort ();
3599
3600 return parts.disp != NULL_RTX;
4f2c8ebb
RS
3601}
3602
16189740 3603/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
3604 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3605
3606 ??? It seems likely that this will only work because cmpsi is an
3607 expander, and no actual insns use this. */
4f2c8ebb
RS
3608
3609int
e075ae69
RH
3610cmpsi_operand (op, mode)
3611 rtx op;
3612 enum machine_mode mode;
fee2770d 3613{
b9b2c339 3614 if (nonimmediate_operand (op, mode))
e075ae69
RH
3615 return 1;
3616
3617 if (GET_CODE (op) == AND
3618 && GET_MODE (op) == SImode
3619 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3620 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3621 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3622 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3623 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3624 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 3625 return 1;
e9a25f70 3626
fee2770d
RS
3627 return 0;
3628}
d784886d 3629
e075ae69
RH
3630/* Returns 1 if OP is memory operand that can not be represented by the
3631 modRM array. */
d784886d
RK
3632
3633int
e075ae69 3634long_memory_operand (op, mode)
d784886d
RK
3635 register rtx op;
3636 enum machine_mode mode;
3637{
e075ae69 3638 if (! memory_operand (op, mode))
d784886d
RK
3639 return 0;
3640
e075ae69 3641 return memory_address_length (op) != 0;
d784886d 3642}
2247f6ed
JH
3643
3644/* Return nonzero if the rtx is known aligned. */
3645
3646int
3647aligned_operand (op, mode)
3648 rtx op;
3649 enum machine_mode mode;
3650{
3651 struct ix86_address parts;
3652
3653 if (!general_operand (op, mode))
3654 return 0;
3655
0f290768 3656 /* Registers and immediate operands are always "aligned". */
2247f6ed
JH
3657 if (GET_CODE (op) != MEM)
3658 return 1;
3659
0f290768 3660 /* Don't even try to do any aligned optimizations with volatiles. */
2247f6ed
JH
3661 if (MEM_VOLATILE_P (op))
3662 return 0;
3663
3664 op = XEXP (op, 0);
3665
3666 /* Pushes and pops are only valid on the stack pointer. */
3667 if (GET_CODE (op) == PRE_DEC
3668 || GET_CODE (op) == POST_INC)
3669 return 1;
3670
3671 /* Decode the address. */
3672 if (! ix86_decompose_address (op, &parts))
3673 abort ();
3674
1540f9eb
JH
3675 if (parts.base && GET_CODE (parts.base) == SUBREG)
3676 parts.base = SUBREG_REG (parts.base);
3677 if (parts.index && GET_CODE (parts.index) == SUBREG)
3678 parts.index = SUBREG_REG (parts.index);
3679
2247f6ed
JH
3680 /* Look for some component that isn't known to be aligned. */
3681 if (parts.index)
3682 {
3683 if (parts.scale < 4
bdb429a5 3684 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
3685 return 0;
3686 }
3687 if (parts.base)
3688 {
bdb429a5 3689 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
3690 return 0;
3691 }
3692 if (parts.disp)
3693 {
3694 if (GET_CODE (parts.disp) != CONST_INT
3695 || (INTVAL (parts.disp) & 3) != 0)
3696 return 0;
3697 }
3698
3699 /* Didn't find one -- this must be an aligned address. */
3700 return 1;
3701}
e075ae69
RH
3702\f
3703/* Return true if the constant is something that can be loaded with
3704 a special instruction. Only handle 0.0 and 1.0; others are less
3705 worthwhile. */
57dbca5e
BS
3706
3707int
e075ae69
RH
3708standard_80387_constant_p (x)
3709 rtx x;
57dbca5e 3710{
2b04e52b 3711 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 3712 return -1;
2b04e52b
JH
3713 /* Note that on the 80387, other constants, such as pi, that we should support
3714 too. On some machines, these are much slower to load as standard constant,
3715 than to load from doubles in memory. */
3716 if (x == CONST0_RTX (GET_MODE (x)))
3717 return 1;
3718 if (x == CONST1_RTX (GET_MODE (x)))
3719 return 2;
e075ae69 3720 return 0;
57dbca5e
BS
3721}
3722
2b04e52b
JH
3723/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3724 */
3725int
3726standard_sse_constant_p (x)
3727 rtx x;
3728{
3729 if (GET_CODE (x) != CONST_DOUBLE)
3730 return -1;
3731 return (x == CONST0_RTX (GET_MODE (x)));
3732}
3733
2a2ab3f9
JVA
3734/* Returns 1 if OP contains a symbol reference */
3735
3736int
3737symbolic_reference_mentioned_p (op)
3738 rtx op;
3739{
6f7d635c 3740 register const char *fmt;
2a2ab3f9
JVA
3741 register int i;
3742
3743 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3744 return 1;
3745
3746 fmt = GET_RTX_FORMAT (GET_CODE (op));
3747 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3748 {
3749 if (fmt[i] == 'E')
3750 {
3751 register int j;
3752
3753 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3754 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3755 return 1;
3756 }
e9a25f70 3757
2a2ab3f9
JVA
3758 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3759 return 1;
3760 }
3761
3762 return 0;
3763}
e075ae69
RH
3764
3765/* Return 1 if it is appropriate to emit `ret' instructions in the
3766 body of a function. Do this only if the epilogue is simple, needing a
3767 couple of insns. Prior to reloading, we can't tell how many registers
3768 must be saved, so return 0 then. Return 0 if there is no frame
3769 marker to de-allocate.
3770
3771 If NON_SAVING_SETJMP is defined and true, then it is not possible
3772 for the epilogue to be simple, so return 0. This is a special case
3773 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3774 until final, but jump_optimize may need to know sooner if a
3775 `return' is OK. */
32b5b1aa
SC
3776
3777int
e075ae69 3778ix86_can_use_return_insn_p ()
32b5b1aa 3779{
4dd2ac2c 3780 struct ix86_frame frame;
9a7372d6 3781
e075ae69
RH
3782#ifdef NON_SAVING_SETJMP
3783 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3784 return 0;
3785#endif
9a7372d6
RH
3786
3787 if (! reload_completed || frame_pointer_needed)
3788 return 0;
32b5b1aa 3789
9a7372d6
RH
3790 /* Don't allow more than 32 pop, since that's all we can do
3791 with one instruction. */
3792 if (current_function_pops_args
3793 && current_function_args_size >= 32768)
e075ae69 3794 return 0;
32b5b1aa 3795
4dd2ac2c
JH
3796 ix86_compute_frame_layout (&frame);
3797 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 3798}
6189a572
JH
3799\f
3800/* Return 1 if VALUE can be stored in the sign extended immediate field. */
3801int
3802x86_64_sign_extended_value (value)
3803 rtx value;
3804{
3805 switch (GET_CODE (value))
3806 {
3807 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3808 to be at least 32 and this all acceptable constants are
3809 represented as CONST_INT. */
3810 case CONST_INT:
3811 if (HOST_BITS_PER_WIDE_INT == 32)
3812 return 1;
3813 else
3814 {
3815 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
fa9f36a1 3816 return trunc_int_for_mode (val, SImode) == val;
6189a572
JH
3817 }
3818 break;
3819
3820 /* For certain code models, the symbolic references are known to fit. */
3821 case SYMBOL_REF:
3822 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3823
3824 /* For certain code models, the code is near as well. */
3825 case LABEL_REF:
3826 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3827
3828 /* We also may accept the offsetted memory references in certain special
3829 cases. */
3830 case CONST:
3831 if (GET_CODE (XEXP (value, 0)) == UNSPEC
8ee41eaf 3832 && XINT (XEXP (value, 0), 1) == UNSPEC_GOTPCREL)
6189a572
JH
3833 return 1;
3834 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3835 {
3836 rtx op1 = XEXP (XEXP (value, 0), 0);
3837 rtx op2 = XEXP (XEXP (value, 0), 1);
3838 HOST_WIDE_INT offset;
3839
3840 if (ix86_cmodel == CM_LARGE)
3841 return 0;
3842 if (GET_CODE (op2) != CONST_INT)
3843 return 0;
3844 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3845 switch (GET_CODE (op1))
3846 {
3847 case SYMBOL_REF:
3848 /* For CM_SMALL assume that latest object is 1MB before
3849 end of 31bits boundary. We may also accept pretty
3850 large negative constants knowing that all objects are
3851 in the positive half of address space. */
3852 if (ix86_cmodel == CM_SMALL
3853 && offset < 1024*1024*1024
3854 && trunc_int_for_mode (offset, SImode) == offset)
3855 return 1;
3856 /* For CM_KERNEL we know that all object resist in the
3857 negative half of 32bits address space. We may not
3858 accept negative offsets, since they may be just off
d6a7951f 3859 and we may accept pretty large positive ones. */
6189a572
JH
3860 if (ix86_cmodel == CM_KERNEL
3861 && offset > 0
3862 && trunc_int_for_mode (offset, SImode) == offset)
3863 return 1;
3864 break;
3865 case LABEL_REF:
3866 /* These conditions are similar to SYMBOL_REF ones, just the
3867 constraints for code models differ. */
3868 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3869 && offset < 1024*1024*1024
3870 && trunc_int_for_mode (offset, SImode) == offset)
3871 return 1;
3872 if (ix86_cmodel == CM_KERNEL
3873 && offset > 0
3874 && trunc_int_for_mode (offset, SImode) == offset)
3875 return 1;
3876 break;
3877 default:
3878 return 0;
3879 }
3880 }
3881 return 0;
3882 default:
3883 return 0;
3884 }
3885}
3886
3887/* Return 1 if VALUE can be stored in the zero extended immediate field. */
3888int
3889x86_64_zero_extended_value (value)
3890 rtx value;
3891{
3892 switch (GET_CODE (value))
3893 {
3894 case CONST_DOUBLE:
3895 if (HOST_BITS_PER_WIDE_INT == 32)
3896 return (GET_MODE (value) == VOIDmode
3897 && !CONST_DOUBLE_HIGH (value));
3898 else
3899 return 0;
3900 case CONST_INT:
3901 if (HOST_BITS_PER_WIDE_INT == 32)
3902 return INTVAL (value) >= 0;
3903 else
b531087a 3904 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
6189a572
JH
3905 break;
3906
3907 /* For certain code models, the symbolic references are known to fit. */
3908 case SYMBOL_REF:
3909 return ix86_cmodel == CM_SMALL;
3910
3911 /* For certain code models, the code is near as well. */
3912 case LABEL_REF:
3913 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3914
3915 /* We also may accept the offsetted memory references in certain special
3916 cases. */
3917 case CONST:
3918 if (GET_CODE (XEXP (value, 0)) == PLUS)
3919 {
3920 rtx op1 = XEXP (XEXP (value, 0), 0);
3921 rtx op2 = XEXP (XEXP (value, 0), 1);
3922
3923 if (ix86_cmodel == CM_LARGE)
3924 return 0;
3925 switch (GET_CODE (op1))
3926 {
3927 case SYMBOL_REF:
3928 return 0;
d6a7951f 3929 /* For small code model we may accept pretty large positive
6189a572
JH
3930 offsets, since one bit is available for free. Negative
3931 offsets are limited by the size of NULL pointer area
3932 specified by the ABI. */
3933 if (ix86_cmodel == CM_SMALL
3934 && GET_CODE (op2) == CONST_INT
3935 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3936 && (trunc_int_for_mode (INTVAL (op2), SImode)
3937 == INTVAL (op2)))
3938 return 1;
3939 /* ??? For the kernel, we may accept adjustment of
3940 -0x10000000, since we know that it will just convert
d6a7951f 3941 negative address space to positive, but perhaps this
6189a572
JH
3942 is not worthwhile. */
3943 break;
3944 case LABEL_REF:
3945 /* These conditions are similar to SYMBOL_REF ones, just the
3946 constraints for code models differ. */
3947 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3948 && GET_CODE (op2) == CONST_INT
3949 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3950 && (trunc_int_for_mode (INTVAL (op2), SImode)
3951 == INTVAL (op2)))
3952 return 1;
3953 break;
3954 default:
3955 return 0;
3956 }
3957 }
3958 return 0;
3959 default:
3960 return 0;
3961 }
3962}
6fca22eb
RH
3963
3964/* Value should be nonzero if functions must have frame pointers.
3965 Zero means the frame pointer need not be set up (and parms may
3966 be accessed via the stack pointer) in functions that seem suitable. */
3967
3968int
3969ix86_frame_pointer_required ()
3970{
3971 /* If we accessed previous frames, then the generated code expects
3972 to be able to access the saved ebp value in our frame. */
3973 if (cfun->machine->accesses_prev_frame)
3974 return 1;
a4f31c00 3975
6fca22eb
RH
3976 /* Several x86 os'es need a frame pointer for other reasons,
3977 usually pertaining to setjmp. */
3978 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3979 return 1;
3980
3981 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3982 the frame pointer by default. Turn it back on now if we've not
3983 got a leaf function. */
a7943381
RH
3984 if (TARGET_OMIT_LEAF_FRAME_POINTER
3985 && (!current_function_is_leaf || current_function_profile))
6fca22eb
RH
3986 return 1;
3987
3988 return 0;
3989}
3990
3991/* Record that the current function accesses previous call frames. */
3992
3993void
3994ix86_setup_frame_addresses ()
3995{
3996 cfun->machine->accesses_prev_frame = 1;
3997}
e075ae69 3998\f
145aacc2
RH
3999#if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4000# define USE_HIDDEN_LINKONCE 1
4001#else
4002# define USE_HIDDEN_LINKONCE 0
4003#endif
4004
bd09bdeb 4005static int pic_labels_used;
e9a25f70 4006
145aacc2
RH
4007/* Fills in the label name that should be used for a pc thunk for
4008 the given register. */
4009
4010static void
4011get_pc_thunk_name (name, regno)
4012 char name[32];
4013 unsigned int regno;
4014{
4015 if (USE_HIDDEN_LINKONCE)
4016 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4017 else
4018 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4019}
4020
4021
e075ae69
RH
4022/* This function generates code for -fpic that loads %ebx with
4023 the return address of the caller and then returns. */
4024
4025void
4cf12e7e 4026ix86_asm_file_end (file)
e075ae69 4027 FILE *file;
e075ae69
RH
4028{
4029 rtx xops[2];
bd09bdeb 4030 int regno;
32b5b1aa 4031
bd09bdeb 4032 for (regno = 0; regno < 8; ++regno)
7c262518 4033 {
145aacc2
RH
4034 char name[32];
4035
bd09bdeb
RH
4036 if (! ((pic_labels_used >> regno) & 1))
4037 continue;
4038
145aacc2 4039 get_pc_thunk_name (name, regno);
bd09bdeb 4040
145aacc2
RH
4041 if (USE_HIDDEN_LINKONCE)
4042 {
4043 tree decl;
4044
4045 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4046 error_mark_node);
4047 TREE_PUBLIC (decl) = 1;
4048 TREE_STATIC (decl) = 1;
4049 DECL_ONE_ONLY (decl) = 1;
4050
4051 (*targetm.asm_out.unique_section) (decl, 0);
4052 named_section (decl, NULL, 0);
4053
5eb99654 4054 (*targetm.asm_out.globalize_label) (file, name);
145aacc2
RH
4055 fputs ("\t.hidden\t", file);
4056 assemble_name (file, name);
4057 fputc ('\n', file);
4058 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4059 }
4060 else
4061 {
4062 text_section ();
4063 ASM_OUTPUT_LABEL (file, name);
4064 }
bd09bdeb
RH
4065
4066 xops[0] = gen_rtx_REG (SImode, regno);
4067 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4068 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4069 output_asm_insn ("ret", xops);
7c262518 4070 }
32b5b1aa 4071}
32b5b1aa 4072
c8c03509 4073/* Emit code for the SET_GOT patterns. */
32b5b1aa 4074
c8c03509
RH
4075const char *
4076output_set_got (dest)
4077 rtx dest;
4078{
4079 rtx xops[3];
0d7d98ee 4080
c8c03509 4081 xops[0] = dest;
5fc0e5df 4082 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
32b5b1aa 4083
c8c03509 4084 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
32b5b1aa 4085 {
c8c03509
RH
4086 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4087
4088 if (!flag_pic)
4089 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4090 else
4091 output_asm_insn ("call\t%a2", xops);
4092
b069de3b
SS
4093#if TARGET_MACHO
4094 /* Output the "canonical" label name ("Lxx$pb") here too. This
4095 is what will be referred to by the Mach-O PIC subsystem. */
4096 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4097#endif
c8c03509
RH
4098 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4099 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4100
4101 if (flag_pic)
4102 output_asm_insn ("pop{l}\t%0", xops);
32b5b1aa 4103 }
e075ae69 4104 else
e5cb57e8 4105 {
145aacc2
RH
4106 char name[32];
4107 get_pc_thunk_name (name, REGNO (dest));
bd09bdeb 4108 pic_labels_used |= 1 << REGNO (dest);
f996902d 4109
145aacc2 4110 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
c8c03509
RH
4111 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4112 output_asm_insn ("call\t%X2", xops);
e5cb57e8 4113 }
e5cb57e8 4114
c8c03509
RH
4115 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4116 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
b069de3b 4117 else if (!TARGET_MACHO)
8e9fadc3 4118 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
79325812 4119
c8c03509 4120 return "";
e9a25f70 4121}
8dfe5673 4122
0d7d98ee 4123/* Generate an "push" pattern for input ARG. */
e9a25f70 4124
e075ae69
RH
4125static rtx
4126gen_push (arg)
4127 rtx arg;
e9a25f70 4128{
c5c76735 4129 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
4130 gen_rtx_MEM (Pmode,
4131 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
4132 stack_pointer_rtx)),
4133 arg);
e9a25f70
JL
4134}
4135
bd09bdeb
RH
4136/* Return >= 0 if there is an unused call-clobbered register available
4137 for the entire function. */
4138
4139static unsigned int
4140ix86_select_alt_pic_regnum ()
4141{
4142 if (current_function_is_leaf && !current_function_profile)
4143 {
4144 int i;
4145 for (i = 2; i >= 0; --i)
4146 if (!regs_ever_live[i])
4147 return i;
4148 }
4149
4150 return INVALID_REGNUM;
4151}
fce5a9f2 4152
4dd2ac2c
JH
4153/* Return 1 if we need to save REGNO. */
4154static int
1020a5ab 4155ix86_save_reg (regno, maybe_eh_return)
9b690711 4156 unsigned int regno;
37a58036 4157 int maybe_eh_return;
1020a5ab 4158{
bd09bdeb
RH
4159 if (pic_offset_table_rtx
4160 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4161 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
66edd3b4 4162 || current_function_profile
1020a5ab 4163 || current_function_calls_eh_return))
bd09bdeb
RH
4164 {
4165 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4166 return 0;
4167 return 1;
4168 }
1020a5ab
RH
4169
4170 if (current_function_calls_eh_return && maybe_eh_return)
4171 {
4172 unsigned i;
4173 for (i = 0; ; i++)
4174 {
b531087a 4175 unsigned test = EH_RETURN_DATA_REGNO (i);
1020a5ab
RH
4176 if (test == INVALID_REGNUM)
4177 break;
9b690711 4178 if (test == regno)
1020a5ab
RH
4179 return 1;
4180 }
4181 }
4dd2ac2c 4182
1020a5ab
RH
4183 return (regs_ever_live[regno]
4184 && !call_used_regs[regno]
4185 && !fixed_regs[regno]
4186 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
4187}
4188
0903fcab
JH
4189/* Return number of registers to be saved on the stack. */
4190
4191static int
4192ix86_nsaved_regs ()
4193{
4194 int nregs = 0;
0903fcab
JH
4195 int regno;
4196
4dd2ac2c 4197 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4198 if (ix86_save_reg (regno, true))
4dd2ac2c 4199 nregs++;
0903fcab
JH
4200 return nregs;
4201}
4202
4203/* Return the offset between two registers, one to be eliminated, and the other
4204 its replacement, at the start of a routine. */
4205
4206HOST_WIDE_INT
4207ix86_initial_elimination_offset (from, to)
4208 int from;
4209 int to;
4210{
4dd2ac2c
JH
4211 struct ix86_frame frame;
4212 ix86_compute_frame_layout (&frame);
564d80f4
JH
4213
4214 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4215 return frame.hard_frame_pointer_offset;
564d80f4
JH
4216 else if (from == FRAME_POINTER_REGNUM
4217 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4218 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4219 else
4220 {
564d80f4
JH
4221 if (to != STACK_POINTER_REGNUM)
4222 abort ();
4223 else if (from == ARG_POINTER_REGNUM)
4dd2ac2c 4224 return frame.stack_pointer_offset;
564d80f4
JH
4225 else if (from != FRAME_POINTER_REGNUM)
4226 abort ();
0903fcab 4227 else
4dd2ac2c 4228 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4229 }
4230}
4231
4dd2ac2c 4232/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 4233
4dd2ac2c
JH
4234static void
4235ix86_compute_frame_layout (frame)
4236 struct ix86_frame *frame;
65954bd8 4237{
65954bd8 4238 HOST_WIDE_INT total_size;
564d80f4 4239 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
44affdae
JH
4240 int offset;
4241 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4dd2ac2c 4242 HOST_WIDE_INT size = get_frame_size ();
65954bd8 4243
4dd2ac2c 4244 frame->nregs = ix86_nsaved_regs ();
564d80f4 4245 total_size = size;
65954bd8 4246
9ba81eaa 4247 /* Skip return address and saved base pointer. */
4dd2ac2c
JH
4248 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4249
4250 frame->hard_frame_pointer_offset = offset;
564d80f4 4251
fcbfaa65
RK
4252 /* Do some sanity checking of stack_alignment_needed and
4253 preferred_alignment, since i386 port is the only using those features
f710504c 4254 that may break easily. */
564d80f4 4255
44affdae
JH
4256 if (size && !stack_alignment_needed)
4257 abort ();
44affdae
JH
4258 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4259 abort ();
4260 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4261 abort ();
4262 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4263 abort ();
564d80f4 4264
4dd2ac2c
JH
4265 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4266 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 4267
4dd2ac2c
JH
4268 /* Register save area */
4269 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 4270
8362f420
JH
4271 /* Va-arg area */
4272 if (ix86_save_varrargs_registers)
4273 {
4274 offset += X86_64_VARARGS_SIZE;
4275 frame->va_arg_size = X86_64_VARARGS_SIZE;
4276 }
4277 else
4278 frame->va_arg_size = 0;
4279
4dd2ac2c
JH
4280 /* Align start of frame for local function. */
4281 frame->padding1 = ((offset + stack_alignment_needed - 1)
4282 & -stack_alignment_needed) - offset;
f73ad30e 4283
4dd2ac2c 4284 offset += frame->padding1;
65954bd8 4285
4dd2ac2c
JH
4286 /* Frame pointer points here. */
4287 frame->frame_pointer_offset = offset;
54ff41b7 4288
4dd2ac2c 4289 offset += size;
65954bd8 4290
0b7ae565
RH
4291 /* Add outgoing arguments area. Can be skipped if we eliminated
4292 all the function calls as dead code. */
4293 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4dd2ac2c
JH
4294 {
4295 offset += current_function_outgoing_args_size;
4296 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4297 }
4298 else
4299 frame->outgoing_arguments_size = 0;
564d80f4 4300
002ff5bc
RH
4301 /* Align stack boundary. Only needed if we're calling another function
4302 or using alloca. */
4303 if (!current_function_is_leaf || current_function_calls_alloca)
0b7ae565
RH
4304 frame->padding2 = ((offset + preferred_alignment - 1)
4305 & -preferred_alignment) - offset;
4306 else
4307 frame->padding2 = 0;
4dd2ac2c
JH
4308
4309 offset += frame->padding2;
4310
4311 /* We've reached end of stack frame. */
4312 frame->stack_pointer_offset = offset;
4313
4314 /* Size prologue needs to allocate. */
4315 frame->to_allocate =
4316 (size + frame->padding1 + frame->padding2
8362f420 4317 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 4318
8362f420
JH
4319 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4320 && current_function_is_leaf)
4321 {
4322 frame->red_zone_size = frame->to_allocate;
4323 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4324 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4325 }
4326 else
4327 frame->red_zone_size = 0;
4328 frame->to_allocate -= frame->red_zone_size;
4329 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c
JH
4330#if 0
4331 fprintf (stderr, "nregs: %i\n", frame->nregs);
4332 fprintf (stderr, "size: %i\n", size);
4333 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4334 fprintf (stderr, "padding1: %i\n", frame->padding1);
8362f420 4335 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4dd2ac2c
JH
4336 fprintf (stderr, "padding2: %i\n", frame->padding2);
4337 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
8362f420 4338 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4dd2ac2c
JH
4339 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4340 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4341 frame->hard_frame_pointer_offset);
4342 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4343#endif
65954bd8
JL
4344}
4345
0903fcab
JH
4346/* Emit code to save registers in the prologue. */
4347
4348static void
4349ix86_emit_save_regs ()
4350{
4351 register int regno;
0903fcab 4352 rtx insn;
0903fcab 4353
4dd2ac2c 4354 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4355 if (ix86_save_reg (regno, true))
0903fcab 4356 {
0d7d98ee 4357 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
4358 RTX_FRAME_RELATED_P (insn) = 1;
4359 }
4360}
4361
c6036a37
JH
4362/* Emit code to save registers using MOV insns. First register
4363 is restored from POINTER + OFFSET. */
4364static void
4365ix86_emit_save_regs_using_mov (pointer, offset)
b72f00af
RK
4366 rtx pointer;
4367 HOST_WIDE_INT offset;
c6036a37
JH
4368{
4369 int regno;
4370 rtx insn;
4371
4372 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4373 if (ix86_save_reg (regno, true))
4374 {
b72f00af
RK
4375 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4376 Pmode, offset),
c6036a37
JH
4377 gen_rtx_REG (Pmode, regno));
4378 RTX_FRAME_RELATED_P (insn) = 1;
4379 offset += UNITS_PER_WORD;
4380 }
4381}
4382
0f290768 4383/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
4384
4385void
4386ix86_expand_prologue ()
2a2ab3f9 4387{
564d80f4 4388 rtx insn;
bd09bdeb 4389 bool pic_reg_used;
4dd2ac2c 4390 struct ix86_frame frame;
6ab16dd9 4391 int use_mov = 0;
c6036a37 4392 HOST_WIDE_INT allocate;
4dd2ac2c 4393
2ab0437e 4394 if (!optimize_size)
6ab16dd9
JH
4395 {
4396 use_fast_prologue_epilogue
4397 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
2ab0437e
JH
4398 if (TARGET_PROLOGUE_USING_MOVE)
4399 use_mov = use_fast_prologue_epilogue;
6ab16dd9 4400 }
4dd2ac2c 4401 ix86_compute_frame_layout (&frame);
79325812 4402
e075ae69
RH
4403 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4404 slower on all targets. Also sdb doesn't like it. */
e9a25f70 4405
2a2ab3f9
JVA
4406 if (frame_pointer_needed)
4407 {
564d80f4 4408 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 4409 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 4410
564d80f4 4411 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 4412 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
4413 }
4414
c6036a37
JH
4415 allocate = frame.to_allocate;
4416 /* In case we are dealing only with single register and empty frame,
4417 push is equivalent of the mov+add sequence. */
4418 if (allocate == 0 && frame.nregs <= 1)
4419 use_mov = 0;
4420
4421 if (!use_mov)
4422 ix86_emit_save_regs ();
4423 else
4424 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 4425
c6036a37 4426 if (allocate == 0)
8dfe5673 4427 ;
e323735c 4428 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
469ac993 4429 {
f2042df3
RH
4430 insn = emit_insn (gen_pro_epilogue_adjust_stack
4431 (stack_pointer_rtx, stack_pointer_rtx,
e323735c 4432 GEN_INT (-allocate)));
e075ae69 4433 RTX_FRAME_RELATED_P (insn) = 1;
469ac993 4434 }
79325812 4435 else
8dfe5673 4436 {
e075ae69 4437 /* ??? Is this only valid for Win32? */
e9a25f70 4438
e075ae69 4439 rtx arg0, sym;
e9a25f70 4440
8362f420 4441 if (TARGET_64BIT)
b531087a 4442 abort ();
8362f420 4443
e075ae69 4444 arg0 = gen_rtx_REG (SImode, 0);
c6036a37 4445 emit_move_insn (arg0, GEN_INT (allocate));
77a989d1 4446
e075ae69
RH
4447 sym = gen_rtx_MEM (FUNCTION_MODE,
4448 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
32ee7d1d 4449 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
e075ae69
RH
4450
4451 CALL_INSN_FUNCTION_USAGE (insn)
276ab4a4
RH
4452 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4453 CALL_INSN_FUNCTION_USAGE (insn));
e075ae69 4454 }
c6036a37
JH
4455 if (use_mov)
4456 {
4457 if (!frame_pointer_needed || !frame.to_allocate)
4458 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4459 else
4460 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4461 -frame.nregs * UNITS_PER_WORD);
4462 }
e9a25f70 4463
84530511
SC
4464#ifdef SUBTARGET_PROLOGUE
4465 SUBTARGET_PROLOGUE;
0f290768 4466#endif
84530511 4467
bd09bdeb
RH
4468 pic_reg_used = false;
4469 if (pic_offset_table_rtx
4470 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4471 || current_function_profile))
4472 {
4473 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4474
4475 if (alt_pic_reg_used != INVALID_REGNUM)
4476 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4477
4478 pic_reg_used = true;
4479 }
4480
e9a25f70 4481 if (pic_reg_used)
c8c03509
RH
4482 {
4483 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4484
66edd3b4
RH
4485 /* Even with accurate pre-reload life analysis, we can wind up
4486 deleting all references to the pic register after reload.
4487 Consider if cross-jumping unifies two sides of a branch
4488 controled by a comparison vs the only read from a global.
4489 In which case, allow the set_got to be deleted, though we're
4490 too late to do anything about the ebx save in the prologue. */
c8c03509
RH
4491 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4492 }
77a989d1 4493
66edd3b4
RH
4494 /* Prevent function calls from be scheduled before the call to mcount.
4495 In the pic_reg_used case, make sure that the got load isn't deleted. */
4496 if (current_function_profile)
4497 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
77a989d1
SC
4498}
4499
da2d1d3a
JH
4500/* Emit code to restore saved registers using MOV insns. First register
4501 is restored from POINTER + OFFSET. */
4502static void
1020a5ab
RH
4503ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4504 rtx pointer;
4505 int offset;
37a58036 4506 int maybe_eh_return;
da2d1d3a
JH
4507{
4508 int regno;
da2d1d3a 4509
4dd2ac2c 4510 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4511 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 4512 {
4dd2ac2c 4513 emit_move_insn (gen_rtx_REG (Pmode, regno),
b72f00af
RK
4514 adjust_address (gen_rtx_MEM (Pmode, pointer),
4515 Pmode, offset));
4dd2ac2c 4516 offset += UNITS_PER_WORD;
da2d1d3a
JH
4517 }
4518}
4519
0f290768 4520/* Restore function stack, frame, and registers. */
e9a25f70 4521
2a2ab3f9 4522void
1020a5ab
RH
4523ix86_expand_epilogue (style)
4524 int style;
2a2ab3f9 4525{
1c71e60e 4526 int regno;
fdb8a883 4527 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 4528 struct ix86_frame frame;
65954bd8 4529 HOST_WIDE_INT offset;
4dd2ac2c
JH
4530
4531 ix86_compute_frame_layout (&frame);
2a2ab3f9 4532
a4f31c00 4533 /* Calculate start of saved registers relative to ebp. Special care
84e306b4
RH
4534 must be taken for the normal return case of a function using
4535 eh_return: the eax and edx registers are marked as saved, but not
4536 restored along this path. */
4537 offset = frame.nregs;
4538 if (current_function_calls_eh_return && style != 2)
4539 offset -= 2;
4540 offset *= -UNITS_PER_WORD;
2a2ab3f9 4541
fdb8a883
JW
4542 /* If we're only restoring one register and sp is not valid then
4543 using a move instruction to restore the register since it's
0f290768 4544 less work than reloading sp and popping the register.
da2d1d3a
JH
4545
4546 The default code result in stack adjustment using add/lea instruction,
4547 while this code results in LEAVE instruction (or discrete equivalent),
4548 so it is profitable in some other cases as well. Especially when there
4549 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4550 and there is exactly one register to pop. This heruistic may need some
4551 tuning in future. */
4dd2ac2c 4552 if ((!sp_valid && frame.nregs <= 1)
2ab0437e 4553 || (TARGET_EPILOGUE_USING_MOVE
6ab16dd9 4554 && use_fast_prologue_epilogue
c6036a37 4555 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 4556 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2ab0437e 4557 || (frame_pointer_needed && TARGET_USE_LEAVE
6ab16dd9 4558 && use_fast_prologue_epilogue && frame.nregs == 1)
2ab0437e 4559 || current_function_calls_eh_return)
2a2ab3f9 4560 {
da2d1d3a
JH
4561 /* Restore registers. We can use ebp or esp to address the memory
4562 locations. If both are available, default to ebp, since offsets
4563 are known to be small. Only exception is esp pointing directly to the
4564 end of block of saved registers, where we may simplify addressing
4565 mode. */
4566
4dd2ac2c 4567 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
4568 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4569 frame.to_allocate, style == 2);
da2d1d3a 4570 else
1020a5ab
RH
4571 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4572 offset, style == 2);
4573
4574 /* eh_return epilogues need %ecx added to the stack pointer. */
4575 if (style == 2)
4576 {
4577 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 4578
1020a5ab
RH
4579 if (frame_pointer_needed)
4580 {
4581 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4582 tmp = plus_constant (tmp, UNITS_PER_WORD);
4583 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4584
4585 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4586 emit_move_insn (hard_frame_pointer_rtx, tmp);
4587
4588 emit_insn (gen_pro_epilogue_adjust_stack
f2042df3 4589 (stack_pointer_rtx, sa, const0_rtx));
1020a5ab
RH
4590 }
4591 else
4592 {
4593 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4594 tmp = plus_constant (tmp, (frame.to_allocate
4595 + frame.nregs * UNITS_PER_WORD));
4596 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4597 }
4598 }
4599 else if (!frame_pointer_needed)
f2042df3
RH
4600 emit_insn (gen_pro_epilogue_adjust_stack
4601 (stack_pointer_rtx, stack_pointer_rtx,
4602 GEN_INT (frame.to_allocate
4603 + frame.nregs * UNITS_PER_WORD)));
0f290768 4604 /* If not an i386, mov & pop is faster than "leave". */
6ab16dd9 4605 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
8362f420 4606 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
c8c5cb99 4607 else
2a2ab3f9 4608 {
1c71e60e
JH
4609 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4610 hard_frame_pointer_rtx,
f2042df3 4611 const0_rtx));
8362f420
JH
4612 if (TARGET_64BIT)
4613 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4614 else
4615 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
4616 }
4617 }
1c71e60e 4618 else
68f654ec 4619 {
1c71e60e
JH
4620 /* First step is to deallocate the stack frame so that we can
4621 pop the registers. */
4622 if (!sp_valid)
4623 {
4624 if (!frame_pointer_needed)
4625 abort ();
4626 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4627 hard_frame_pointer_rtx,
f2042df3 4628 GEN_INT (offset)));
1c71e60e 4629 }
4dd2ac2c 4630 else if (frame.to_allocate)
f2042df3
RH
4631 emit_insn (gen_pro_epilogue_adjust_stack
4632 (stack_pointer_rtx, stack_pointer_rtx,
4633 GEN_INT (frame.to_allocate)));
1c71e60e 4634
4dd2ac2c 4635 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4636 if (ix86_save_reg (regno, false))
8362f420
JH
4637 {
4638 if (TARGET_64BIT)
4639 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4640 else
4641 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4642 }
4dd2ac2c 4643 if (frame_pointer_needed)
8362f420 4644 {
f5143c46 4645 /* Leave results in shorter dependency chains on CPUs that are
2ab0437e
JH
4646 able to grok it fast. */
4647 if (TARGET_USE_LEAVE)
4648 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4649 else if (TARGET_64BIT)
8362f420
JH
4650 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4651 else
4652 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4653 }
68f654ec 4654 }
68f654ec 4655
cbbf65e0 4656 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 4657 if (style == 0)
cbbf65e0
RH
4658 return;
4659
2a2ab3f9
JVA
4660 if (current_function_pops_args && current_function_args_size)
4661 {
e075ae69 4662 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 4663
b8c752c8
UD
4664 /* i386 can only pop 64K bytes. If asked to pop more, pop
4665 return address, do explicit add, and jump indirectly to the
0f290768 4666 caller. */
2a2ab3f9 4667
b8c752c8 4668 if (current_function_pops_args >= 65536)
2a2ab3f9 4669 {
e075ae69 4670 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 4671
8362f420
JH
4672 /* There are is no "pascal" calling convention in 64bit ABI. */
4673 if (TARGET_64BIT)
b531087a 4674 abort ();
8362f420 4675
e075ae69
RH
4676 emit_insn (gen_popsi1 (ecx));
4677 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 4678 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 4679 }
79325812 4680 else
e075ae69
RH
4681 emit_jump_insn (gen_return_pop_internal (popc));
4682 }
4683 else
4684 emit_jump_insn (gen_return_internal ());
4685}
bd09bdeb
RH
4686
4687/* Reset from the function's potential modifications. */
4688
4689static void
4690ix86_output_function_epilogue (file, size)
4691 FILE *file ATTRIBUTE_UNUSED;
4692 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
4693{
4694 if (pic_offset_table_rtx)
4695 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4696}
e075ae69
RH
4697\f
4698/* Extract the parts of an RTL expression that is a valid memory address
b446e5a2
JH
4699 for an instruction. Return 0 if the structure of the address is
4700 grossly off. Return -1 if the address contains ASHIFT, so it is not
4701 strictly valid, but still used for computing length of lea instruction.
4702 */
e075ae69
RH
4703
4704static int
4705ix86_decompose_address (addr, out)
4706 register rtx addr;
4707 struct ix86_address *out;
4708{
4709 rtx base = NULL_RTX;
4710 rtx index = NULL_RTX;
4711 rtx disp = NULL_RTX;
4712 HOST_WIDE_INT scale = 1;
4713 rtx scale_rtx = NULL_RTX;
b446e5a2 4714 int retval = 1;
e075ae69 4715
1540f9eb 4716 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
e075ae69
RH
4717 base = addr;
4718 else if (GET_CODE (addr) == PLUS)
4719 {
4720 rtx op0 = XEXP (addr, 0);
4721 rtx op1 = XEXP (addr, 1);
4722 enum rtx_code code0 = GET_CODE (op0);
4723 enum rtx_code code1 = GET_CODE (op1);
4724
4725 if (code0 == REG || code0 == SUBREG)
4726 {
4727 if (code1 == REG || code1 == SUBREG)
4728 index = op0, base = op1; /* index + base */
4729 else
4730 base = op0, disp = op1; /* base + displacement */
4731 }
4732 else if (code0 == MULT)
e9a25f70 4733 {
e075ae69
RH
4734 index = XEXP (op0, 0);
4735 scale_rtx = XEXP (op0, 1);
4736 if (code1 == REG || code1 == SUBREG)
4737 base = op1; /* index*scale + base */
e9a25f70 4738 else
e075ae69
RH
4739 disp = op1; /* index*scale + disp */
4740 }
4741 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4742 {
4743 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4744 scale_rtx = XEXP (XEXP (op0, 0), 1);
4745 base = XEXP (op0, 1);
4746 disp = op1;
2a2ab3f9 4747 }
e075ae69
RH
4748 else if (code0 == PLUS)
4749 {
4750 index = XEXP (op0, 0); /* index + base + disp */
4751 base = XEXP (op0, 1);
4752 disp = op1;
4753 }
4754 else
b446e5a2 4755 return 0;
e075ae69
RH
4756 }
4757 else if (GET_CODE (addr) == MULT)
4758 {
4759 index = XEXP (addr, 0); /* index*scale */
4760 scale_rtx = XEXP (addr, 1);
4761 }
4762 else if (GET_CODE (addr) == ASHIFT)
4763 {
4764 rtx tmp;
4765
4766 /* We're called for lea too, which implements ashift on occasion. */
4767 index = XEXP (addr, 0);
4768 tmp = XEXP (addr, 1);
4769 if (GET_CODE (tmp) != CONST_INT)
b446e5a2 4770 return 0;
e075ae69
RH
4771 scale = INTVAL (tmp);
4772 if ((unsigned HOST_WIDE_INT) scale > 3)
b446e5a2 4773 return 0;
e075ae69 4774 scale = 1 << scale;
b446e5a2 4775 retval = -1;
2a2ab3f9 4776 }
2a2ab3f9 4777 else
e075ae69
RH
4778 disp = addr; /* displacement */
4779
4780 /* Extract the integral value of scale. */
4781 if (scale_rtx)
e9a25f70 4782 {
e075ae69 4783 if (GET_CODE (scale_rtx) != CONST_INT)
b446e5a2 4784 return 0;
e075ae69 4785 scale = INTVAL (scale_rtx);
e9a25f70 4786 }
3b3c6a3f 4787
e075ae69
RH
4788 /* Allow arg pointer and stack pointer as index if there is not scaling */
4789 if (base && index && scale == 1
564d80f4
JH
4790 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4791 || index == stack_pointer_rtx))
e075ae69
RH
4792 {
4793 rtx tmp = base;
4794 base = index;
4795 index = tmp;
4796 }
4797
4798 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
4799 if ((base == hard_frame_pointer_rtx
4800 || base == frame_pointer_rtx
4801 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
4802 disp = const0_rtx;
4803
4804 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4805 Avoid this by transforming to [%esi+0]. */
4806 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4807 && base && !index && !disp
329e1d01 4808 && REG_P (base)
e075ae69
RH
4809 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4810 disp = const0_rtx;
4811
4812 /* Special case: encode reg+reg instead of reg*2. */
4813 if (!base && index && scale && scale == 2)
4814 base = index, scale = 1;
0f290768 4815
e075ae69
RH
4816 /* Special case: scaling cannot be encoded without base or displacement. */
4817 if (!base && !disp && index && scale != 1)
4818 disp = const0_rtx;
4819
4820 out->base = base;
4821 out->index = index;
4822 out->disp = disp;
4823 out->scale = scale;
3b3c6a3f 4824
b446e5a2 4825 return retval;
e075ae69 4826}
01329426
JH
4827\f
4828/* Return cost of the memory address x.
4829 For i386, it is better to use a complex address than let gcc copy
4830 the address into a reg and make a new pseudo. But not if the address
4831 requires to two regs - that would mean more pseudos with longer
4832 lifetimes. */
4833int
4834ix86_address_cost (x)
4835 rtx x;
4836{
4837 struct ix86_address parts;
4838 int cost = 1;
3b3c6a3f 4839
01329426
JH
4840 if (!ix86_decompose_address (x, &parts))
4841 abort ();
4842
1540f9eb
JH
4843 if (parts.base && GET_CODE (parts.base) == SUBREG)
4844 parts.base = SUBREG_REG (parts.base);
4845 if (parts.index && GET_CODE (parts.index) == SUBREG)
4846 parts.index = SUBREG_REG (parts.index);
4847
01329426
JH
4848 /* More complex memory references are better. */
4849 if (parts.disp && parts.disp != const0_rtx)
4850 cost--;
4851
4852 /* Attempt to minimize number of registers in the address. */
4853 if ((parts.base
4854 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4855 || (parts.index
4856 && (!REG_P (parts.index)
4857 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4858 cost++;
4859
4860 if (parts.base
4861 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4862 && parts.index
4863 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4864 && parts.base != parts.index)
4865 cost++;
4866
4867 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4868 since it's predecode logic can't detect the length of instructions
4869 and it degenerates to vector decoded. Increase cost of such
4870 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 4871 to split such addresses or even refuse such addresses at all.
01329426
JH
4872
4873 Following addressing modes are affected:
4874 [base+scale*index]
4875 [scale*index+disp]
4876 [base+index]
0f290768 4877
01329426
JH
4878 The first and last case may be avoidable by explicitly coding the zero in
4879 memory address, but I don't have AMD-K6 machine handy to check this
4880 theory. */
4881
4882 if (TARGET_K6
4883 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4884 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4885 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4886 cost += 10;
0f290768 4887
01329426
JH
4888 return cost;
4889}
4890\f
b949ea8b
JW
4891/* If X is a machine specific address (i.e. a symbol or label being
4892 referenced as a displacement from the GOT implemented using an
4893 UNSPEC), then return the base term. Otherwise return X. */
4894
4895rtx
4896ix86_find_base_term (x)
4897 rtx x;
4898{
4899 rtx term;
4900
6eb791fc
JH
4901 if (TARGET_64BIT)
4902 {
4903 if (GET_CODE (x) != CONST)
4904 return x;
4905 term = XEXP (x, 0);
4906 if (GET_CODE (term) == PLUS
4907 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4908 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4909 term = XEXP (term, 0);
4910 if (GET_CODE (term) != UNSPEC
8ee41eaf 4911 || XINT (term, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
4912 return x;
4913
4914 term = XVECEXP (term, 0, 0);
4915
4916 if (GET_CODE (term) != SYMBOL_REF
4917 && GET_CODE (term) != LABEL_REF)
4918 return x;
4919
4920 return term;
4921 }
4922
b949ea8b
JW
4923 if (GET_CODE (x) != PLUS
4924 || XEXP (x, 0) != pic_offset_table_rtx
4925 || GET_CODE (XEXP (x, 1)) != CONST)
4926 return x;
4927
4928 term = XEXP (XEXP (x, 1), 0);
4929
4930 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4931 term = XEXP (term, 0);
4932
4933 if (GET_CODE (term) != UNSPEC
8ee41eaf 4934 || XINT (term, 1) != UNSPEC_GOTOFF)
b949ea8b
JW
4935 return x;
4936
4937 term = XVECEXP (term, 0, 0);
4938
4939 if (GET_CODE (term) != SYMBOL_REF
4940 && GET_CODE (term) != LABEL_REF)
4941 return x;
4942
4943 return term;
4944}
4945\f
f996902d
RH
4946/* Determine if a given RTX is a valid constant. We already know this
4947 satisfies CONSTANT_P. */
4948
4949bool
4950legitimate_constant_p (x)
4951 rtx x;
4952{
4953 rtx inner;
4954
4955 switch (GET_CODE (x))
4956 {
4957 case SYMBOL_REF:
4958 /* TLS symbols are not constant. */
4959 if (tls_symbolic_operand (x, Pmode))
4960 return false;
4961 break;
4962
4963 case CONST:
4964 inner = XEXP (x, 0);
4965
4966 /* Offsets of TLS symbols are never valid.
4967 Discourage CSE from creating them. */
4968 if (GET_CODE (inner) == PLUS
4969 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
4970 return false;
4971
4972 /* Only some unspecs are valid as "constants". */
4973 if (GET_CODE (inner) == UNSPEC)
4974 switch (XINT (inner, 1))
4975 {
4976 case UNSPEC_TPOFF:
4977 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
4978 default:
4979 return false;
4980 }
4981 break;
4982
4983 default:
4984 break;
4985 }
4986
4987 /* Otherwise we handle everything else in the move patterns. */
4988 return true;
4989}
4990
4991/* Determine if a given RTX is a valid constant address. */
4992
4993bool
4994constant_address_p (x)
4995 rtx x;
4996{
4997 switch (GET_CODE (x))
4998 {
4999 case LABEL_REF:
5000 case CONST_INT:
5001 return true;
5002
5003 case CONST_DOUBLE:
5004 return TARGET_64BIT;
5005
5006 case CONST:
b069de3b
SS
5007 /* For Mach-O, really believe the CONST. */
5008 if (TARGET_MACHO)
5009 return true;
5010 /* Otherwise fall through. */
f996902d
RH
5011 case SYMBOL_REF:
5012 return !flag_pic && legitimate_constant_p (x);
5013
5014 default:
5015 return false;
5016 }
5017}
5018
5019/* Nonzero if the constant value X is a legitimate general operand
fce5a9f2 5020 when generating PIC code. It is given that flag_pic is on and
f996902d
RH
5021 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5022
5023bool
5024legitimate_pic_operand_p (x)
5025 rtx x;
5026{
5027 rtx inner;
5028
5029 switch (GET_CODE (x))
5030 {
5031 case CONST:
5032 inner = XEXP (x, 0);
5033
5034 /* Only some unspecs are valid as "constants". */
5035 if (GET_CODE (inner) == UNSPEC)
5036 switch (XINT (inner, 1))
5037 {
5038 case UNSPEC_TPOFF:
5039 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
5040 default:
5041 return false;
5042 }
5043 /* FALLTHRU */
5044
5045 case SYMBOL_REF:
5046 case LABEL_REF:
5047 return legitimate_pic_address_disp_p (x);
5048
5049 default:
5050 return true;
5051 }
5052}
5053
e075ae69
RH
5054/* Determine if a given CONST RTX is a valid memory displacement
5055 in PIC mode. */
0f290768 5056
59be65f6 5057int
91bb873f
RH
5058legitimate_pic_address_disp_p (disp)
5059 register rtx disp;
5060{
f996902d
RH
5061 bool saw_plus;
5062
6eb791fc
JH
5063 /* In 64bit mode we can allow direct addresses of symbols and labels
5064 when they are not dynamic symbols. */
5065 if (TARGET_64BIT)
5066 {
5067 rtx x = disp;
5068 if (GET_CODE (disp) == CONST)
5069 x = XEXP (disp, 0);
5070 /* ??? Handle PIC code models */
5071 if (GET_CODE (x) == PLUS
5072 && (GET_CODE (XEXP (x, 1)) == CONST_INT
5073 && ix86_cmodel == CM_SMALL_PIC
5074 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
5075 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
5076 x = XEXP (x, 0);
5077 if (local_symbolic_operand (x, Pmode))
5078 return 1;
5079 }
91bb873f
RH
5080 if (GET_CODE (disp) != CONST)
5081 return 0;
5082 disp = XEXP (disp, 0);
5083
6eb791fc
JH
5084 if (TARGET_64BIT)
5085 {
5086 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5087 of GOT tables. We should not need these anyway. */
5088 if (GET_CODE (disp) != UNSPEC
8ee41eaf 5089 || XINT (disp, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
5090 return 0;
5091
5092 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5093 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5094 return 0;
5095 return 1;
5096 }
5097
f996902d 5098 saw_plus = false;
91bb873f
RH
5099 if (GET_CODE (disp) == PLUS)
5100 {
5101 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5102 return 0;
5103 disp = XEXP (disp, 0);
f996902d 5104 saw_plus = true;
91bb873f
RH
5105 }
5106
b069de3b
SS
5107 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5108 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5109 {
5110 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5111 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5112 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5113 {
5114 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5115 if (strstr (sym_name, "$pb") != 0)
5116 return 1;
5117 }
5118 }
5119
8ee41eaf 5120 if (GET_CODE (disp) != UNSPEC)
91bb873f
RH
5121 return 0;
5122
623fe810
RH
5123 switch (XINT (disp, 1))
5124 {
8ee41eaf 5125 case UNSPEC_GOT:
f996902d
RH
5126 if (saw_plus)
5127 return false;
623fe810 5128 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
8ee41eaf 5129 case UNSPEC_GOTOFF:
623fe810 5130 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
f996902d 5131 case UNSPEC_GOTTPOFF:
dea73790
JJ
5132 case UNSPEC_GOTNTPOFF:
5133 case UNSPEC_INDNTPOFF:
f996902d
RH
5134 if (saw_plus)
5135 return false;
5136 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5137 case UNSPEC_NTPOFF:
f996902d
RH
5138 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5139 case UNSPEC_DTPOFF:
f996902d 5140 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
623fe810 5141 }
fce5a9f2 5142
623fe810 5143 return 0;
91bb873f
RH
5144}
5145
e075ae69
RH
5146/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5147 memory address for an instruction. The MODE argument is the machine mode
5148 for the MEM expression that wants to use this address.
5149
5150 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5151 convert common non-canonical forms to canonical form so that they will
5152 be recognized. */
5153
3b3c6a3f
MM
5154int
5155legitimate_address_p (mode, addr, strict)
5156 enum machine_mode mode;
5157 register rtx addr;
5158 int strict;
5159{
e075ae69
RH
5160 struct ix86_address parts;
5161 rtx base, index, disp;
5162 HOST_WIDE_INT scale;
5163 const char *reason = NULL;
5164 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
5165
5166 if (TARGET_DEBUG_ADDR)
5167 {
5168 fprintf (stderr,
e9a25f70 5169 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 5170 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
5171 debug_rtx (addr);
5172 }
5173
9e20be0c
JJ
5174 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5175 {
5176 if (TARGET_DEBUG_ADDR)
5177 fprintf (stderr, "Success.\n");
5178 return TRUE;
5179 }
5180
b446e5a2 5181 if (ix86_decompose_address (addr, &parts) <= 0)
3b3c6a3f 5182 {
e075ae69 5183 reason = "decomposition failed";
50e60bc3 5184 goto report_error;
3b3c6a3f
MM
5185 }
5186
e075ae69
RH
5187 base = parts.base;
5188 index = parts.index;
5189 disp = parts.disp;
5190 scale = parts.scale;
91f0226f 5191
e075ae69 5192 /* Validate base register.
e9a25f70
JL
5193
5194 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
5195 is one word out of a two word structure, which is represented internally
5196 as a DImode int. */
e9a25f70 5197
3b3c6a3f
MM
5198 if (base)
5199 {
1540f9eb 5200 rtx reg;
e075ae69
RH
5201 reason_rtx = base;
5202
1540f9eb
JH
5203 if (GET_CODE (base) == SUBREG)
5204 reg = SUBREG_REG (base);
5205 else
5206 reg = base;
5207
5208 if (GET_CODE (reg) != REG)
3b3c6a3f 5209 {
e075ae69 5210 reason = "base is not a register";
50e60bc3 5211 goto report_error;
3b3c6a3f
MM
5212 }
5213
c954bd01
RH
5214 if (GET_MODE (base) != Pmode)
5215 {
e075ae69 5216 reason = "base is not in Pmode";
50e60bc3 5217 goto report_error;
c954bd01
RH
5218 }
5219
1540f9eb
JH
5220 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5221 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
3b3c6a3f 5222 {
e075ae69 5223 reason = "base is not valid";
50e60bc3 5224 goto report_error;
3b3c6a3f
MM
5225 }
5226 }
5227
e075ae69 5228 /* Validate index register.
e9a25f70
JL
5229
5230 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
5231 is one word out of a two word structure, which is represented internally
5232 as a DImode int. */
e075ae69
RH
5233
5234 if (index)
3b3c6a3f 5235 {
1540f9eb 5236 rtx reg;
e075ae69
RH
5237 reason_rtx = index;
5238
1540f9eb
JH
5239 if (GET_CODE (index) == SUBREG)
5240 reg = SUBREG_REG (index);
5241 else
5242 reg = index;
5243
5244 if (GET_CODE (reg) != REG)
3b3c6a3f 5245 {
e075ae69 5246 reason = "index is not a register";
50e60bc3 5247 goto report_error;
3b3c6a3f
MM
5248 }
5249
e075ae69 5250 if (GET_MODE (index) != Pmode)
c954bd01 5251 {
e075ae69 5252 reason = "index is not in Pmode";
50e60bc3 5253 goto report_error;
c954bd01
RH
5254 }
5255
1540f9eb
JH
5256 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5257 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
3b3c6a3f 5258 {
e075ae69 5259 reason = "index is not valid";
50e60bc3 5260 goto report_error;
3b3c6a3f
MM
5261 }
5262 }
3b3c6a3f 5263
e075ae69
RH
5264 /* Validate scale factor. */
5265 if (scale != 1)
3b3c6a3f 5266 {
e075ae69
RH
5267 reason_rtx = GEN_INT (scale);
5268 if (!index)
3b3c6a3f 5269 {
e075ae69 5270 reason = "scale without index";
50e60bc3 5271 goto report_error;
3b3c6a3f
MM
5272 }
5273
e075ae69 5274 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 5275 {
e075ae69 5276 reason = "scale is not a valid multiplier";
50e60bc3 5277 goto report_error;
3b3c6a3f
MM
5278 }
5279 }
5280
91bb873f 5281 /* Validate displacement. */
3b3c6a3f
MM
5282 if (disp)
5283 {
e075ae69
RH
5284 reason_rtx = disp;
5285
0d7d98ee 5286 if (TARGET_64BIT)
3b3c6a3f 5287 {
0d7d98ee
JH
5288 if (!x86_64_sign_extended_value (disp))
5289 {
5290 reason = "displacement is out of range";
5291 goto report_error;
5292 }
5293 }
5294 else
5295 {
5296 if (GET_CODE (disp) == CONST_DOUBLE)
5297 {
5298 reason = "displacement is a const_double";
5299 goto report_error;
5300 }
3b3c6a3f
MM
5301 }
5302
f996902d
RH
5303 if (GET_CODE (disp) == CONST
5304 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5305 switch (XINT (XEXP (disp, 0), 1))
5306 {
5307 case UNSPEC_GOT:
5308 case UNSPEC_GOTOFF:
5309 case UNSPEC_GOTPCREL:
5310 if (!flag_pic)
5311 abort ();
5312 goto is_legitimate_pic;
5313
5314 case UNSPEC_GOTTPOFF:
dea73790
JJ
5315 case UNSPEC_GOTNTPOFF:
5316 case UNSPEC_INDNTPOFF:
f996902d
RH
5317 case UNSPEC_NTPOFF:
5318 case UNSPEC_DTPOFF:
5319 break;
5320
5321 default:
5322 reason = "invalid address unspec";
5323 goto report_error;
5324 }
5325
b069de3b
SS
5326 else if (flag_pic && (SYMBOLIC_CONST (disp)
5327#if TARGET_MACHO
5328 && !machopic_operand_p (disp)
5329#endif
5330 ))
3b3c6a3f 5331 {
f996902d 5332 is_legitimate_pic:
0d7d98ee
JH
5333 if (TARGET_64BIT && (index || base))
5334 {
5335 reason = "non-constant pic memory reference";
5336 goto report_error;
5337 }
91bb873f
RH
5338 if (! legitimate_pic_address_disp_p (disp))
5339 {
e075ae69 5340 reason = "displacement is an invalid pic construct";
50e60bc3 5341 goto report_error;
91bb873f
RH
5342 }
5343
4e9efe54 5344 /* This code used to verify that a symbolic pic displacement
0f290768
KH
5345 includes the pic_offset_table_rtx register.
5346
4e9efe54
JH
5347 While this is good idea, unfortunately these constructs may
5348 be created by "adds using lea" optimization for incorrect
5349 code like:
5350
5351 int a;
5352 int foo(int i)
5353 {
5354 return *(&a+i);
5355 }
5356
50e60bc3 5357 This code is nonsensical, but results in addressing
4e9efe54 5358 GOT table with pic_offset_table_rtx base. We can't
f710504c 5359 just refuse it easily, since it gets matched by
4e9efe54
JH
5360 "addsi3" pattern, that later gets split to lea in the
5361 case output register differs from input. While this
5362 can be handled by separate addsi pattern for this case
5363 that never results in lea, this seems to be easier and
5364 correct fix for crash to disable this test. */
3b3c6a3f 5365 }
f996902d
RH
5366 else if (!CONSTANT_ADDRESS_P (disp))
5367 {
5368 reason = "displacement is not constant";
5369 goto report_error;
5370 }
3b3c6a3f
MM
5371 }
5372
e075ae69 5373 /* Everything looks valid. */
3b3c6a3f 5374 if (TARGET_DEBUG_ADDR)
e075ae69 5375 fprintf (stderr, "Success.\n");
3b3c6a3f 5376 return TRUE;
e075ae69 5377
5bf0ebab 5378 report_error:
e075ae69
RH
5379 if (TARGET_DEBUG_ADDR)
5380 {
5381 fprintf (stderr, "Error: %s\n", reason);
5382 debug_rtx (reason_rtx);
5383 }
5384 return FALSE;
3b3c6a3f 5385}
3b3c6a3f 5386\f
55efb413
JW
5387/* Return an unique alias set for the GOT. */
5388
0f290768 5389static HOST_WIDE_INT
55efb413
JW
5390ix86_GOT_alias_set ()
5391{
5bf0ebab
RH
5392 static HOST_WIDE_INT set = -1;
5393 if (set == -1)
5394 set = new_alias_set ();
5395 return set;
0f290768 5396}
55efb413 5397
3b3c6a3f
MM
5398/* Return a legitimate reference for ORIG (an address) using the
5399 register REG. If REG is 0, a new pseudo is generated.
5400
91bb873f 5401 There are two types of references that must be handled:
3b3c6a3f
MM
5402
5403 1. Global data references must load the address from the GOT, via
5404 the PIC reg. An insn is emitted to do this load, and the reg is
5405 returned.
5406
91bb873f
RH
5407 2. Static data references, constant pool addresses, and code labels
5408 compute the address as an offset from the GOT, whose base is in
5409 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5410 differentiate them from global data objects. The returned
5411 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
5412
5413 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 5414 reg also appears in the address. */
3b3c6a3f
MM
5415
5416rtx
5417legitimize_pic_address (orig, reg)
5418 rtx orig;
5419 rtx reg;
5420{
5421 rtx addr = orig;
5422 rtx new = orig;
91bb873f 5423 rtx base;
3b3c6a3f 5424
b069de3b
SS
5425#if TARGET_MACHO
5426 if (reg == 0)
5427 reg = gen_reg_rtx (Pmode);
5428 /* Use the generic Mach-O PIC machinery. */
5429 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5430#endif
5431
623fe810 5432 if (local_symbolic_operand (addr, Pmode))
3b3c6a3f 5433 {
14f73b5a
JH
5434 /* In 64bit mode we can address such objects directly. */
5435 if (TARGET_64BIT)
5436 new = addr;
5437 else
5438 {
5439 /* This symbol may be referenced via a displacement from the PIC
5440 base address (@GOTOFF). */
3b3c6a3f 5441
66edd3b4
RH
5442 if (reload_in_progress)
5443 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf 5444 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
14f73b5a
JH
5445 new = gen_rtx_CONST (Pmode, new);
5446 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 5447
14f73b5a
JH
5448 if (reg != 0)
5449 {
5450 emit_move_insn (reg, new);
5451 new = reg;
5452 }
5453 }
3b3c6a3f 5454 }
91bb873f 5455 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 5456 {
14f73b5a
JH
5457 if (TARGET_64BIT)
5458 {
8ee41eaf 5459 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
14f73b5a
JH
5460 new = gen_rtx_CONST (Pmode, new);
5461 new = gen_rtx_MEM (Pmode, new);
5462 RTX_UNCHANGING_P (new) = 1;
5463 set_mem_alias_set (new, ix86_GOT_alias_set ());
5464
5465 if (reg == 0)
5466 reg = gen_reg_rtx (Pmode);
5467 /* Use directly gen_movsi, otherwise the address is loaded
5468 into register for CSE. We don't want to CSE this addresses,
5469 instead we CSE addresses from the GOT table, so skip this. */
5470 emit_insn (gen_movsi (reg, new));
5471 new = reg;
5472 }
5473 else
5474 {
5475 /* This symbol must be referenced via a load from the
5476 Global Offset Table (@GOT). */
3b3c6a3f 5477
66edd3b4
RH
5478 if (reload_in_progress)
5479 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf 5480 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
14f73b5a
JH
5481 new = gen_rtx_CONST (Pmode, new);
5482 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5483 new = gen_rtx_MEM (Pmode, new);
5484 RTX_UNCHANGING_P (new) = 1;
5485 set_mem_alias_set (new, ix86_GOT_alias_set ());
3b3c6a3f 5486
14f73b5a
JH
5487 if (reg == 0)
5488 reg = gen_reg_rtx (Pmode);
5489 emit_move_insn (reg, new);
5490 new = reg;
5491 }
0f290768 5492 }
91bb873f
RH
5493 else
5494 {
5495 if (GET_CODE (addr) == CONST)
3b3c6a3f 5496 {
91bb873f 5497 addr = XEXP (addr, 0);
e3c8ea67
RH
5498
5499 /* We must match stuff we generate before. Assume the only
5500 unspecs that can get here are ours. Not that we could do
5501 anything with them anyway... */
5502 if (GET_CODE (addr) == UNSPEC
5503 || (GET_CODE (addr) == PLUS
5504 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5505 return orig;
5506 if (GET_CODE (addr) != PLUS)
564d80f4 5507 abort ();
3b3c6a3f 5508 }
91bb873f
RH
5509 if (GET_CODE (addr) == PLUS)
5510 {
5511 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 5512
91bb873f
RH
5513 /* Check first to see if this is a constant offset from a @GOTOFF
5514 symbol reference. */
623fe810 5515 if (local_symbolic_operand (op0, Pmode)
91bb873f
RH
5516 && GET_CODE (op1) == CONST_INT)
5517 {
6eb791fc
JH
5518 if (!TARGET_64BIT)
5519 {
66edd3b4
RH
5520 if (reload_in_progress)
5521 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf
RH
5522 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5523 UNSPEC_GOTOFF);
6eb791fc
JH
5524 new = gen_rtx_PLUS (Pmode, new, op1);
5525 new = gen_rtx_CONST (Pmode, new);
5526 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
91bb873f 5527
6eb791fc
JH
5528 if (reg != 0)
5529 {
5530 emit_move_insn (reg, new);
5531 new = reg;
5532 }
5533 }
5534 else
91bb873f 5535 {
6eb791fc 5536 /* ??? We need to limit offsets here. */
91bb873f
RH
5537 }
5538 }
5539 else
5540 {
5541 base = legitimize_pic_address (XEXP (addr, 0), reg);
5542 new = legitimize_pic_address (XEXP (addr, 1),
5543 base == reg ? NULL_RTX : reg);
5544
5545 if (GET_CODE (new) == CONST_INT)
5546 new = plus_constant (base, INTVAL (new));
5547 else
5548 {
5549 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5550 {
5551 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5552 new = XEXP (new, 1);
5553 }
5554 new = gen_rtx_PLUS (Pmode, base, new);
5555 }
5556 }
5557 }
3b3c6a3f
MM
5558 }
5559 return new;
5560}
fb49053f 5561
fb49053f 5562static void
f996902d 5563ix86_encode_section_info (decl, first)
fb49053f
RH
5564 tree decl;
5565 int first ATTRIBUTE_UNUSED;
5566{
f996902d
RH
5567 bool local_p = (*targetm.binds_local_p) (decl);
5568 rtx rtl, symbol;
5569
5570 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5571 if (GET_CODE (rtl) != MEM)
5572 return;
5573 symbol = XEXP (rtl, 0);
5574 if (GET_CODE (symbol) != SYMBOL_REF)
5575 return;
5576
5577 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5578 symbol so that we may access it directly in the GOT. */
5579
fb49053f 5580 if (flag_pic)
f996902d
RH
5581 SYMBOL_REF_FLAG (symbol) = local_p;
5582
5583 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5584 "local dynamic", "initial exec" or "local exec" TLS models
5585 respectively. */
5586
5587 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
fb49053f 5588 {
f996902d
RH
5589 const char *symbol_str;
5590 char *newstr;
5591 size_t len;
dce81a1a 5592 enum tls_model kind = decl_tls_model (decl);
f996902d
RH
5593
5594 symbol_str = XSTR (symbol, 0);
fb49053f 5595
f996902d
RH
5596 if (symbol_str[0] == '%')
5597 {
5598 if (symbol_str[1] == tls_model_chars[kind])
5599 return;
5600 symbol_str += 2;
5601 }
5602 len = strlen (symbol_str) + 1;
5603 newstr = alloca (len + 2);
5604
5605 newstr[0] = '%';
5606 newstr[1] = tls_model_chars[kind];
5607 memcpy (newstr + 2, symbol_str, len);
5608
5609 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
fb49053f
RH
5610 }
5611}
f996902d
RH
5612
5613/* Undo the above when printing symbol names. */
5614
5615static const char *
5616ix86_strip_name_encoding (str)
5617 const char *str;
5618{
5619 if (str[0] == '%')
5620 str += 2;
5621 if (str [0] == '*')
5622 str += 1;
5623 return str;
5624}
3b3c6a3f 5625\f
f996902d
RH
5626/* Load the thread pointer into a register. */
5627
5628static rtx
5629get_thread_pointer ()
5630{
5631 rtx tp;
5632
5633 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9e20be0c
JJ
5634 tp = gen_rtx_MEM (Pmode, tp);
5635 RTX_UNCHANGING_P (tp) = 1;
5636 set_mem_alias_set (tp, ix86_GOT_alias_set ());
f996902d
RH
5637 tp = force_reg (Pmode, tp);
5638
5639 return tp;
5640}
fce5a9f2 5641
3b3c6a3f
MM
5642/* Try machine-dependent ways of modifying an illegitimate address
5643 to be legitimate. If we find one, return the new, valid address.
5644 This macro is used in only one place: `memory_address' in explow.c.
5645
5646 OLDX is the address as it was before break_out_memory_refs was called.
5647 In some cases it is useful to look at this to decide what needs to be done.
5648
5649 MODE and WIN are passed so that this macro can use
5650 GO_IF_LEGITIMATE_ADDRESS.
5651
5652 It is always safe for this macro to do nothing. It exists to recognize
5653 opportunities to optimize the output.
5654
5655 For the 80386, we handle X+REG by loading X into a register R and
5656 using R+REG. R will go in a general reg and indexing will be used.
5657 However, if REG is a broken-out memory address or multiplication,
5658 nothing needs to be done because REG can certainly go in a general reg.
5659
5660 When -fpic is used, special handling is needed for symbolic references.
5661 See comments by legitimize_pic_address in i386.c for details. */
5662
5663rtx
5664legitimize_address (x, oldx, mode)
5665 register rtx x;
bb5177ac 5666 register rtx oldx ATTRIBUTE_UNUSED;
3b3c6a3f
MM
5667 enum machine_mode mode;
5668{
5669 int changed = 0;
5670 unsigned log;
5671
5672 if (TARGET_DEBUG_ADDR)
5673 {
e9a25f70
JL
5674 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5675 GET_MODE_NAME (mode));
3b3c6a3f
MM
5676 debug_rtx (x);
5677 }
5678
f996902d
RH
5679 log = tls_symbolic_operand (x, mode);
5680 if (log)
5681 {
5682 rtx dest, base, off, pic;
5683
755ac5d4 5684 switch (log)
f996902d
RH
5685 {
5686 case TLS_MODEL_GLOBAL_DYNAMIC:
5687 dest = gen_reg_rtx (Pmode);
5688 emit_insn (gen_tls_global_dynamic (dest, x));
5689 break;
5690
5691 case TLS_MODEL_LOCAL_DYNAMIC:
5692 base = gen_reg_rtx (Pmode);
5693 emit_insn (gen_tls_local_dynamic_base (base));
5694
5695 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5696 off = gen_rtx_CONST (Pmode, off);
5697
5698 return gen_rtx_PLUS (Pmode, base, off);
5699
5700 case TLS_MODEL_INITIAL_EXEC:
5701 if (flag_pic)
5702 {
66edd3b4
RH
5703 if (reload_in_progress)
5704 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
f996902d
RH
5705 pic = pic_offset_table_rtx;
5706 }
dea73790 5707 else if (!TARGET_GNU_TLS)
f996902d
RH
5708 {
5709 pic = gen_reg_rtx (Pmode);
5710 emit_insn (gen_set_got (pic));
5711 }
dea73790
JJ
5712 else
5713 pic = NULL;
f996902d
RH
5714
5715 base = get_thread_pointer ();
5716
dea73790
JJ
5717 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5718 !TARGET_GNU_TLS
5719 ? UNSPEC_GOTTPOFF
5720 : flag_pic ? UNSPEC_GOTNTPOFF
5721 : UNSPEC_INDNTPOFF);
f996902d 5722 off = gen_rtx_CONST (Pmode, off);
dea73790
JJ
5723 if (flag_pic || !TARGET_GNU_TLS)
5724 off = gen_rtx_PLUS (Pmode, pic, off);
f996902d
RH
5725 off = gen_rtx_MEM (Pmode, off);
5726 RTX_UNCHANGING_P (off) = 1;
5727 set_mem_alias_set (off, ix86_GOT_alias_set ());
f996902d 5728 dest = gen_reg_rtx (Pmode);
dea73790
JJ
5729
5730 if (TARGET_GNU_TLS)
5731 {
5732 emit_move_insn (dest, off);
5733 return gen_rtx_PLUS (Pmode, base, dest);
5734 }
5735 else
5736 emit_insn (gen_subsi3 (dest, base, off));
f996902d
RH
5737 break;
5738
5739 case TLS_MODEL_LOCAL_EXEC:
5740 base = get_thread_pointer ();
5741
5742 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5743 TARGET_GNU_TLS ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5744 off = gen_rtx_CONST (Pmode, off);
5745
5746 if (TARGET_GNU_TLS)
5747 return gen_rtx_PLUS (Pmode, base, off);
5748 else
5749 {
5750 dest = gen_reg_rtx (Pmode);
5751 emit_insn (gen_subsi3 (dest, base, off));
5752 }
5753 break;
5754
5755 default:
5756 abort ();
5757 }
5758
5759 return dest;
5760 }
5761
3b3c6a3f
MM
5762 if (flag_pic && SYMBOLIC_CONST (x))
5763 return legitimize_pic_address (x, 0);
5764
5765 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5766 if (GET_CODE (x) == ASHIFT
5767 && GET_CODE (XEXP (x, 1)) == CONST_INT
b531087a 5768 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3b3c6a3f
MM
5769 {
5770 changed = 1;
a269a03c
JC
5771 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5772 GEN_INT (1 << log));
3b3c6a3f
MM
5773 }
5774
5775 if (GET_CODE (x) == PLUS)
5776 {
0f290768 5777 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 5778
3b3c6a3f
MM
5779 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5780 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
b531087a 5781 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3b3c6a3f
MM
5782 {
5783 changed = 1;
c5c76735
JL
5784 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5785 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5786 GEN_INT (1 << log));
3b3c6a3f
MM
5787 }
5788
5789 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5790 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
b531087a 5791 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3b3c6a3f
MM
5792 {
5793 changed = 1;
c5c76735
JL
5794 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5795 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5796 GEN_INT (1 << log));
3b3c6a3f
MM
5797 }
5798
0f290768 5799 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
5800 if (GET_CODE (XEXP (x, 1)) == MULT)
5801 {
5802 rtx tmp = XEXP (x, 0);
5803 XEXP (x, 0) = XEXP (x, 1);
5804 XEXP (x, 1) = tmp;
5805 changed = 1;
5806 }
5807
5808 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5809 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5810 created by virtual register instantiation, register elimination, and
5811 similar optimizations. */
5812 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5813 {
5814 changed = 1;
c5c76735
JL
5815 x = gen_rtx_PLUS (Pmode,
5816 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5817 XEXP (XEXP (x, 1), 0)),
5818 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
5819 }
5820
e9a25f70
JL
5821 /* Canonicalize
5822 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
5823 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5824 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5825 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5826 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5827 && CONSTANT_P (XEXP (x, 1)))
5828 {
00c79232
ML
5829 rtx constant;
5830 rtx other = NULL_RTX;
3b3c6a3f
MM
5831
5832 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5833 {
5834 constant = XEXP (x, 1);
5835 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5836 }
5837 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5838 {
5839 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5840 other = XEXP (x, 1);
5841 }
5842 else
5843 constant = 0;
5844
5845 if (constant)
5846 {
5847 changed = 1;
c5c76735
JL
5848 x = gen_rtx_PLUS (Pmode,
5849 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5850 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5851 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
5852 }
5853 }
5854
5855 if (changed && legitimate_address_p (mode, x, FALSE))
5856 return x;
5857
5858 if (GET_CODE (XEXP (x, 0)) == MULT)
5859 {
5860 changed = 1;
5861 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5862 }
5863
5864 if (GET_CODE (XEXP (x, 1)) == MULT)
5865 {
5866 changed = 1;
5867 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5868 }
5869
5870 if (changed
5871 && GET_CODE (XEXP (x, 1)) == REG
5872 && GET_CODE (XEXP (x, 0)) == REG)
5873 return x;
5874
5875 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5876 {
5877 changed = 1;
5878 x = legitimize_pic_address (x, 0);
5879 }
5880
5881 if (changed && legitimate_address_p (mode, x, FALSE))
5882 return x;
5883
5884 if (GET_CODE (XEXP (x, 0)) == REG)
5885 {
5886 register rtx temp = gen_reg_rtx (Pmode);
5887 register rtx val = force_operand (XEXP (x, 1), temp);
5888 if (val != temp)
5889 emit_move_insn (temp, val);
5890
5891 XEXP (x, 1) = temp;
5892 return x;
5893 }
5894
5895 else if (GET_CODE (XEXP (x, 1)) == REG)
5896 {
5897 register rtx temp = gen_reg_rtx (Pmode);
5898 register rtx val = force_operand (XEXP (x, 0), temp);
5899 if (val != temp)
5900 emit_move_insn (temp, val);
5901
5902 XEXP (x, 0) = temp;
5903 return x;
5904 }
5905 }
5906
5907 return x;
5908}
2a2ab3f9
JVA
5909\f
5910/* Print an integer constant expression in assembler syntax. Addition
5911 and subtraction are the only arithmetic that may appear in these
5912 expressions. FILE is the stdio stream to write to, X is the rtx, and
5913 CODE is the operand print code from the output string. */
5914
5915static void
5916output_pic_addr_const (file, x, code)
5917 FILE *file;
5918 rtx x;
5919 int code;
5920{
5921 char buf[256];
5922
5923 switch (GET_CODE (x))
5924 {
5925 case PC:
5926 if (flag_pic)
5927 putc ('.', file);
5928 else
5929 abort ();
5930 break;
5931
5932 case SYMBOL_REF:
91bb873f 5933 assemble_name (file, XSTR (x, 0));
b069de3b 5934 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
91bb873f 5935 fputs ("@PLT", file);
2a2ab3f9
JVA
5936 break;
5937
91bb873f
RH
5938 case LABEL_REF:
5939 x = XEXP (x, 0);
5940 /* FALLTHRU */
2a2ab3f9
JVA
5941 case CODE_LABEL:
5942 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5943 assemble_name (asm_out_file, buf);
5944 break;
5945
5946 case CONST_INT:
f64cecad 5947 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
5948 break;
5949
5950 case CONST:
5951 /* This used to output parentheses around the expression,
5952 but that does not work on the 386 (either ATT or BSD assembler). */
5953 output_pic_addr_const (file, XEXP (x, 0), code);
5954 break;
5955
5956 case CONST_DOUBLE:
5957 if (GET_MODE (x) == VOIDmode)
5958 {
5959 /* We can use %d if the number is <32 bits and positive. */
5960 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
5961 fprintf (file, "0x%lx%08lx",
5962 (unsigned long) CONST_DOUBLE_HIGH (x),
5963 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 5964 else
f64cecad 5965 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
5966 }
5967 else
5968 /* We can't handle floating point constants;
5969 PRINT_OPERAND must handle them. */
5970 output_operand_lossage ("floating constant misused");
5971 break;
5972
5973 case PLUS:
e9a25f70 5974 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
5975 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5976 {
2a2ab3f9 5977 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 5978 putc ('+', file);
e9a25f70 5979 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 5980 }
91bb873f 5981 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 5982 {
2a2ab3f9 5983 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 5984 putc ('+', file);
e9a25f70 5985 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 5986 }
91bb873f
RH
5987 else
5988 abort ();
2a2ab3f9
JVA
5989 break;
5990
5991 case MINUS:
b069de3b
SS
5992 if (!TARGET_MACHO)
5993 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
2a2ab3f9 5994 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 5995 putc ('-', file);
2a2ab3f9 5996 output_pic_addr_const (file, XEXP (x, 1), code);
b069de3b
SS
5997 if (!TARGET_MACHO)
5998 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
2a2ab3f9
JVA
5999 break;
6000
91bb873f
RH
6001 case UNSPEC:
6002 if (XVECLEN (x, 0) != 1)
5bf0ebab 6003 abort ();
91bb873f
RH
6004 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6005 switch (XINT (x, 1))
77ebd435 6006 {
8ee41eaf 6007 case UNSPEC_GOT:
77ebd435
AJ
6008 fputs ("@GOT", file);
6009 break;
8ee41eaf 6010 case UNSPEC_GOTOFF:
77ebd435
AJ
6011 fputs ("@GOTOFF", file);
6012 break;
8ee41eaf 6013 case UNSPEC_GOTPCREL:
edfe8595 6014 fputs ("@GOTPCREL(%rip)", file);
6eb791fc 6015 break;
f996902d 6016 case UNSPEC_GOTTPOFF:
dea73790 6017 /* FIXME: This might be @TPOFF in Sun ld too. */
f996902d
RH
6018 fputs ("@GOTTPOFF", file);
6019 break;
6020 case UNSPEC_TPOFF:
6021 fputs ("@TPOFF", file);
6022 break;
6023 case UNSPEC_NTPOFF:
6024 fputs ("@NTPOFF", file);
6025 break;
6026 case UNSPEC_DTPOFF:
6027 fputs ("@DTPOFF", file);
6028 break;
dea73790
JJ
6029 case UNSPEC_GOTNTPOFF:
6030 fputs ("@GOTNTPOFF", file);
6031 break;
6032 case UNSPEC_INDNTPOFF:
6033 fputs ("@INDNTPOFF", file);
6034 break;
77ebd435
AJ
6035 default:
6036 output_operand_lossage ("invalid UNSPEC as operand");
6037 break;
6038 }
91bb873f
RH
6039 break;
6040
2a2ab3f9
JVA
6041 default:
6042 output_operand_lossage ("invalid expression as operand");
6043 }
6044}
1865dbb5 6045
0f290768 6046/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
1865dbb5
JM
6047 We need to handle our special PIC relocations. */
6048
0f290768 6049void
1865dbb5
JM
6050i386_dwarf_output_addr_const (file, x)
6051 FILE *file;
6052 rtx x;
6053{
14f73b5a 6054#ifdef ASM_QUAD
18b5b8d6 6055 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
14f73b5a
JH
6056#else
6057 if (TARGET_64BIT)
6058 abort ();
18b5b8d6 6059 fprintf (file, "%s", ASM_LONG);
14f73b5a 6060#endif
1865dbb5
JM
6061 if (flag_pic)
6062 output_pic_addr_const (file, x, '\0');
6063 else
6064 output_addr_const (file, x);
6065 fputc ('\n', file);
6066}
6067
b9203463
RH
6068/* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6069 We need to emit DTP-relative relocations. */
6070
6071void
6072i386_output_dwarf_dtprel (file, size, x)
6073 FILE *file;
6074 int size;
6075 rtx x;
6076{
6077 switch (size)
6078 {
6079 case 4:
6080 fputs (ASM_LONG, file);
6081 break;
6082 case 8:
6083#ifdef ASM_QUAD
6084 fputs (ASM_QUAD, file);
6085 break;
6086#endif
6087 default:
6088 abort ();
6089 }
6090
6091 output_addr_const (file, x);
6092 fputs ("@DTPOFF", file);
6093}
6094
1865dbb5
JM
6095/* In the name of slightly smaller debug output, and to cater to
6096 general assembler losage, recognize PIC+GOTOFF and turn it back
6097 into a direct symbol reference. */
6098
6099rtx
6100i386_simplify_dwarf_addr (orig_x)
6101 rtx orig_x;
6102{
ec65b2e3 6103 rtx x = orig_x, y;
1865dbb5 6104
4c8c0dec
JJ
6105 if (GET_CODE (x) == MEM)
6106 x = XEXP (x, 0);
6107
6eb791fc
JH
6108 if (TARGET_64BIT)
6109 {
6110 if (GET_CODE (x) != CONST
6111 || GET_CODE (XEXP (x, 0)) != UNSPEC
8ee41eaf 6112 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
4c8c0dec 6113 || GET_CODE (orig_x) != MEM)
6eb791fc
JH
6114 return orig_x;
6115 return XVECEXP (XEXP (x, 0), 0, 0);
6116 }
6117
1865dbb5 6118 if (GET_CODE (x) != PLUS
1865dbb5
JM
6119 || GET_CODE (XEXP (x, 1)) != CONST)
6120 return orig_x;
6121
ec65b2e3
JJ
6122 if (GET_CODE (XEXP (x, 0)) == REG
6123 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6124 /* %ebx + GOT/GOTOFF */
6125 y = NULL;
6126 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6127 {
6128 /* %ebx + %reg * scale + GOT/GOTOFF */
6129 y = XEXP (x, 0);
6130 if (GET_CODE (XEXP (y, 0)) == REG
6131 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6132 y = XEXP (y, 1);
6133 else if (GET_CODE (XEXP (y, 1)) == REG
6134 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6135 y = XEXP (y, 0);
6136 else
6137 return orig_x;
6138 if (GET_CODE (y) != REG
6139 && GET_CODE (y) != MULT
6140 && GET_CODE (y) != ASHIFT)
6141 return orig_x;
6142 }
6143 else
6144 return orig_x;
6145
1865dbb5
JM
6146 x = XEXP (XEXP (x, 1), 0);
6147 if (GET_CODE (x) == UNSPEC
8ee41eaf
RH
6148 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6149 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6150 {
6151 if (y)
6152 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6153 return XVECEXP (x, 0, 0);
6154 }
1865dbb5
JM
6155
6156 if (GET_CODE (x) == PLUS
6157 && GET_CODE (XEXP (x, 0)) == UNSPEC
6158 && GET_CODE (XEXP (x, 1)) == CONST_INT
8ee41eaf
RH
6159 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6160 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6161 && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6162 {
6163 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6164 if (y)
6165 return gen_rtx_PLUS (Pmode, y, x);
6166 return x;
6167 }
1865dbb5
JM
6168
6169 return orig_x;
6170}
2a2ab3f9 6171\f
a269a03c 6172static void
e075ae69 6173put_condition_code (code, mode, reverse, fp, file)
a269a03c 6174 enum rtx_code code;
e075ae69
RH
6175 enum machine_mode mode;
6176 int reverse, fp;
a269a03c
JC
6177 FILE *file;
6178{
a269a03c
JC
6179 const char *suffix;
6180
9a915772
JH
6181 if (mode == CCFPmode || mode == CCFPUmode)
6182 {
6183 enum rtx_code second_code, bypass_code;
6184 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6185 if (bypass_code != NIL || second_code != NIL)
b531087a 6186 abort ();
9a915772
JH
6187 code = ix86_fp_compare_code_to_integer (code);
6188 mode = CCmode;
6189 }
a269a03c
JC
6190 if (reverse)
6191 code = reverse_condition (code);
e075ae69 6192
a269a03c
JC
6193 switch (code)
6194 {
6195 case EQ:
6196 suffix = "e";
6197 break;
a269a03c
JC
6198 case NE:
6199 suffix = "ne";
6200 break;
a269a03c 6201 case GT:
7e08e190 6202 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
6203 abort ();
6204 suffix = "g";
a269a03c 6205 break;
a269a03c 6206 case GTU:
e075ae69
RH
6207 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6208 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 6209 if (mode != CCmode)
0f290768 6210 abort ();
e075ae69 6211 suffix = fp ? "nbe" : "a";
a269a03c 6212 break;
a269a03c 6213 case LT:
9076b9c1 6214 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6215 suffix = "s";
7e08e190 6216 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6217 suffix = "l";
9076b9c1 6218 else
0f290768 6219 abort ();
a269a03c 6220 break;
a269a03c 6221 case LTU:
9076b9c1 6222 if (mode != CCmode)
0f290768 6223 abort ();
a269a03c
JC
6224 suffix = "b";
6225 break;
a269a03c 6226 case GE:
9076b9c1 6227 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6228 suffix = "ns";
7e08e190 6229 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6230 suffix = "ge";
9076b9c1 6231 else
0f290768 6232 abort ();
a269a03c 6233 break;
a269a03c 6234 case GEU:
e075ae69 6235 /* ??? As above. */
7e08e190 6236 if (mode != CCmode)
0f290768 6237 abort ();
7e08e190 6238 suffix = fp ? "nb" : "ae";
a269a03c 6239 break;
a269a03c 6240 case LE:
7e08e190 6241 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
6242 abort ();
6243 suffix = "le";
a269a03c 6244 break;
a269a03c 6245 case LEU:
9076b9c1
JH
6246 if (mode != CCmode)
6247 abort ();
7e08e190 6248 suffix = "be";
a269a03c 6249 break;
3a3677ff 6250 case UNORDERED:
9e7adcb3 6251 suffix = fp ? "u" : "p";
3a3677ff
RH
6252 break;
6253 case ORDERED:
9e7adcb3 6254 suffix = fp ? "nu" : "np";
3a3677ff 6255 break;
a269a03c
JC
6256 default:
6257 abort ();
6258 }
6259 fputs (suffix, file);
6260}
6261
e075ae69
RH
6262void
6263print_reg (x, code, file)
6264 rtx x;
6265 int code;
6266 FILE *file;
e5cb57e8 6267{
e075ae69 6268 if (REGNO (x) == ARG_POINTER_REGNUM
564d80f4 6269 || REGNO (x) == FRAME_POINTER_REGNUM
e075ae69
RH
6270 || REGNO (x) == FLAGS_REG
6271 || REGNO (x) == FPSR_REG)
6272 abort ();
e9a25f70 6273
5bf0ebab 6274 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
e075ae69
RH
6275 putc ('%', file);
6276
ef6257cd 6277 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
6278 code = 2;
6279 else if (code == 'b')
6280 code = 1;
6281 else if (code == 'k')
6282 code = 4;
3f3f2124
JH
6283 else if (code == 'q')
6284 code = 8;
e075ae69
RH
6285 else if (code == 'y')
6286 code = 3;
6287 else if (code == 'h')
6288 code = 0;
6289 else
6290 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 6291
3f3f2124
JH
6292 /* Irritatingly, AMD extended registers use different naming convention
6293 from the normal registers. */
6294 if (REX_INT_REG_P (x))
6295 {
885a70fd
JH
6296 if (!TARGET_64BIT)
6297 abort ();
3f3f2124
JH
6298 switch (code)
6299 {
ef6257cd 6300 case 0:
c725bd79 6301 error ("extended registers have no high halves");
3f3f2124
JH
6302 break;
6303 case 1:
6304 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6305 break;
6306 case 2:
6307 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6308 break;
6309 case 4:
6310 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6311 break;
6312 case 8:
6313 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6314 break;
6315 default:
c725bd79 6316 error ("unsupported operand size for extended register");
3f3f2124
JH
6317 break;
6318 }
6319 return;
6320 }
e075ae69
RH
6321 switch (code)
6322 {
6323 case 3:
6324 if (STACK_TOP_P (x))
6325 {
6326 fputs ("st(0)", file);
6327 break;
6328 }
6329 /* FALLTHRU */
e075ae69 6330 case 8:
3f3f2124 6331 case 4:
e075ae69 6332 case 12:
446988df 6333 if (! ANY_FP_REG_P (x))
885a70fd 6334 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
e075ae69 6335 /* FALLTHRU */
a7180f70 6336 case 16:
e075ae69
RH
6337 case 2:
6338 fputs (hi_reg_name[REGNO (x)], file);
6339 break;
6340 case 1:
6341 fputs (qi_reg_name[REGNO (x)], file);
6342 break;
6343 case 0:
6344 fputs (qi_high_reg_name[REGNO (x)], file);
6345 break;
6346 default:
6347 abort ();
fe25fea3 6348 }
e5cb57e8
SC
6349}
6350
f996902d
RH
6351/* Locate some local-dynamic symbol still in use by this function
6352 so that we can print its name in some tls_local_dynamic_base
6353 pattern. */
6354
6355static const char *
6356get_some_local_dynamic_name ()
6357{
6358 rtx insn;
6359
6360 if (cfun->machine->some_ld_name)
6361 return cfun->machine->some_ld_name;
6362
6363 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6364 if (INSN_P (insn)
6365 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6366 return cfun->machine->some_ld_name;
6367
6368 abort ();
6369}
6370
6371static int
6372get_some_local_dynamic_name_1 (px, data)
6373 rtx *px;
6374 void *data ATTRIBUTE_UNUSED;
6375{
6376 rtx x = *px;
6377
6378 if (GET_CODE (x) == SYMBOL_REF
6379 && local_dynamic_symbolic_operand (x, Pmode))
6380 {
6381 cfun->machine->some_ld_name = XSTR (x, 0);
6382 return 1;
6383 }
6384
6385 return 0;
6386}
6387
2a2ab3f9 6388/* Meaning of CODE:
fe25fea3 6389 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 6390 C -- print opcode suffix for set/cmov insn.
fe25fea3 6391 c -- like C, but print reversed condition
ef6257cd 6392 F,f -- likewise, but for floating-point.
048b1c95
JJ
6393 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6394 nothing
2a2ab3f9
JVA
6395 R -- print the prefix for register names.
6396 z -- print the opcode suffix for the size of the current operand.
6397 * -- print a star (in certain assembler syntax)
fb204271 6398 A -- print an absolute memory reference.
2a2ab3f9 6399 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
6400 s -- print a shift double count, followed by the assemblers argument
6401 delimiter.
fe25fea3
SC
6402 b -- print the QImode name of the register for the indicated operand.
6403 %b0 would print %al if operands[0] is reg 0.
6404 w -- likewise, print the HImode name of the register.
6405 k -- likewise, print the SImode name of the register.
3f3f2124 6406 q -- likewise, print the DImode name of the register.
ef6257cd
JH
6407 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6408 y -- print "st(0)" instead of "st" as a register.
a46d1d38 6409 D -- print condition for SSE cmp instruction.
ef6257cd
JH
6410 P -- if PIC, print an @PLT suffix.
6411 X -- don't print any sort of PIC '@' suffix for a symbol.
f996902d 6412 & -- print some in-use local-dynamic symbol name.
a46d1d38 6413 */
2a2ab3f9
JVA
6414
6415void
6416print_operand (file, x, code)
6417 FILE *file;
6418 rtx x;
6419 int code;
6420{
6421 if (code)
6422 {
6423 switch (code)
6424 {
6425 case '*':
80f33d06 6426 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9
JVA
6427 putc ('*', file);
6428 return;
6429
f996902d
RH
6430 case '&':
6431 assemble_name (file, get_some_local_dynamic_name ());
6432 return;
6433
fb204271 6434 case 'A':
80f33d06 6435 if (ASSEMBLER_DIALECT == ASM_ATT)
fb204271 6436 putc ('*', file);
80f33d06 6437 else if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
6438 {
6439 /* Intel syntax. For absolute addresses, registers should not
6440 be surrounded by braces. */
6441 if (GET_CODE (x) != REG)
6442 {
6443 putc ('[', file);
6444 PRINT_OPERAND (file, x, 0);
6445 putc (']', file);
6446 return;
6447 }
6448 }
80f33d06
GS
6449 else
6450 abort ();
fb204271
DN
6451
6452 PRINT_OPERAND (file, x, 0);
6453 return;
6454
6455
2a2ab3f9 6456 case 'L':
80f33d06 6457 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6458 putc ('l', file);
2a2ab3f9
JVA
6459 return;
6460
6461 case 'W':
80f33d06 6462 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6463 putc ('w', file);
2a2ab3f9
JVA
6464 return;
6465
6466 case 'B':
80f33d06 6467 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6468 putc ('b', file);
2a2ab3f9
JVA
6469 return;
6470
6471 case 'Q':
80f33d06 6472 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6473 putc ('l', file);
2a2ab3f9
JVA
6474 return;
6475
6476 case 'S':
80f33d06 6477 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6478 putc ('s', file);
2a2ab3f9
JVA
6479 return;
6480
5f1ec3e6 6481 case 'T':
80f33d06 6482 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6483 putc ('t', file);
5f1ec3e6
JVA
6484 return;
6485
2a2ab3f9
JVA
6486 case 'z':
6487 /* 387 opcodes don't get size suffixes if the operands are
0f290768 6488 registers. */
2a2ab3f9
JVA
6489 if (STACK_REG_P (x))
6490 return;
6491
831c4e87
KC
6492 /* Likewise if using Intel opcodes. */
6493 if (ASSEMBLER_DIALECT == ASM_INTEL)
6494 return;
6495
6496 /* This is the size of op from size of operand. */
2a2ab3f9
JVA
6497 switch (GET_MODE_SIZE (GET_MODE (x)))
6498 {
2a2ab3f9 6499 case 2:
155d8a47
JW
6500#ifdef HAVE_GAS_FILDS_FISTS
6501 putc ('s', file);
6502#endif
2a2ab3f9
JVA
6503 return;
6504
6505 case 4:
6506 if (GET_MODE (x) == SFmode)
6507 {
e075ae69 6508 putc ('s', file);
2a2ab3f9
JVA
6509 return;
6510 }
6511 else
e075ae69 6512 putc ('l', file);
2a2ab3f9
JVA
6513 return;
6514
5f1ec3e6 6515 case 12:
2b589241 6516 case 16:
e075ae69
RH
6517 putc ('t', file);
6518 return;
5f1ec3e6 6519
2a2ab3f9
JVA
6520 case 8:
6521 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
6522 {
6523#ifdef GAS_MNEMONICS
e075ae69 6524 putc ('q', file);
56c0e8fa 6525#else
e075ae69
RH
6526 putc ('l', file);
6527 putc ('l', file);
56c0e8fa
JVA
6528#endif
6529 }
e075ae69
RH
6530 else
6531 putc ('l', file);
2a2ab3f9 6532 return;
155d8a47
JW
6533
6534 default:
6535 abort ();
2a2ab3f9 6536 }
4af3895e
JVA
6537
6538 case 'b':
6539 case 'w':
6540 case 'k':
3f3f2124 6541 case 'q':
4af3895e
JVA
6542 case 'h':
6543 case 'y':
5cb6195d 6544 case 'X':
e075ae69 6545 case 'P':
4af3895e
JVA
6546 break;
6547
2d49677f
SC
6548 case 's':
6549 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6550 {
6551 PRINT_OPERAND (file, x, 0);
e075ae69 6552 putc (',', file);
2d49677f 6553 }
a269a03c
JC
6554 return;
6555
a46d1d38
JH
6556 case 'D':
6557 /* Little bit of braindamage here. The SSE compare instructions
6558 does use completely different names for the comparisons that the
6559 fp conditional moves. */
6560 switch (GET_CODE (x))
6561 {
6562 case EQ:
6563 case UNEQ:
6564 fputs ("eq", file);
6565 break;
6566 case LT:
6567 case UNLT:
6568 fputs ("lt", file);
6569 break;
6570 case LE:
6571 case UNLE:
6572 fputs ("le", file);
6573 break;
6574 case UNORDERED:
6575 fputs ("unord", file);
6576 break;
6577 case NE:
6578 case LTGT:
6579 fputs ("neq", file);
6580 break;
6581 case UNGE:
6582 case GE:
6583 fputs ("nlt", file);
6584 break;
6585 case UNGT:
6586 case GT:
6587 fputs ("nle", file);
6588 break;
6589 case ORDERED:
6590 fputs ("ord", file);
6591 break;
6592 default:
6593 abort ();
6594 break;
6595 }
6596 return;
048b1c95
JJ
6597 case 'O':
6598#ifdef CMOV_SUN_AS_SYNTAX
6599 if (ASSEMBLER_DIALECT == ASM_ATT)
6600 {
6601 switch (GET_MODE (x))
6602 {
6603 case HImode: putc ('w', file); break;
6604 case SImode:
6605 case SFmode: putc ('l', file); break;
6606 case DImode:
6607 case DFmode: putc ('q', file); break;
6608 default: abort ();
6609 }
6610 putc ('.', file);
6611 }
6612#endif
6613 return;
1853aadd 6614 case 'C':
e075ae69 6615 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 6616 return;
fe25fea3 6617 case 'F':
048b1c95
JJ
6618#ifdef CMOV_SUN_AS_SYNTAX
6619 if (ASSEMBLER_DIALECT == ASM_ATT)
6620 putc ('.', file);
6621#endif
e075ae69 6622 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
6623 return;
6624
e9a25f70 6625 /* Like above, but reverse condition */
e075ae69 6626 case 'c':
fce5a9f2 6627 /* Check to see if argument to %c is really a constant
c1d5afc4
CR
6628 and not a condition code which needs to be reversed. */
6629 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
6630 {
6631 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6632 return;
6633 }
e075ae69
RH
6634 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6635 return;
fe25fea3 6636 case 'f':
048b1c95
JJ
6637#ifdef CMOV_SUN_AS_SYNTAX
6638 if (ASSEMBLER_DIALECT == ASM_ATT)
6639 putc ('.', file);
6640#endif
e075ae69 6641 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 6642 return;
ef6257cd
JH
6643 case '+':
6644 {
6645 rtx x;
e5cb57e8 6646
ef6257cd
JH
6647 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6648 return;
a4f31c00 6649
ef6257cd
JH
6650 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6651 if (x)
6652 {
6653 int pred_val = INTVAL (XEXP (x, 0));
6654
6655 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6656 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6657 {
6658 int taken = pred_val > REG_BR_PROB_BASE / 2;
6659 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6660
6661 /* Emit hints only in the case default branch prediction
6662 heruistics would fail. */
6663 if (taken != cputaken)
6664 {
6665 /* We use 3e (DS) prefix for taken branches and
6666 2e (CS) prefix for not taken branches. */
6667 if (taken)
6668 fputs ("ds ; ", file);
6669 else
6670 fputs ("cs ; ", file);
6671 }
6672 }
6673 }
6674 return;
6675 }
4af3895e 6676 default:
a52453cc 6677 output_operand_lossage ("invalid operand code `%c'", code);
2a2ab3f9
JVA
6678 }
6679 }
e9a25f70 6680
2a2ab3f9
JVA
6681 if (GET_CODE (x) == REG)
6682 {
6683 PRINT_REG (x, code, file);
6684 }
e9a25f70 6685
2a2ab3f9
JVA
6686 else if (GET_CODE (x) == MEM)
6687 {
e075ae69 6688 /* No `byte ptr' prefix for call instructions. */
80f33d06 6689 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
2a2ab3f9 6690 {
69ddee61 6691 const char * size;
e075ae69
RH
6692 switch (GET_MODE_SIZE (GET_MODE (x)))
6693 {
6694 case 1: size = "BYTE"; break;
6695 case 2: size = "WORD"; break;
6696 case 4: size = "DWORD"; break;
6697 case 8: size = "QWORD"; break;
6698 case 12: size = "XWORD"; break;
a7180f70 6699 case 16: size = "XMMWORD"; break;
e075ae69 6700 default:
564d80f4 6701 abort ();
e075ae69 6702 }
fb204271
DN
6703
6704 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6705 if (code == 'b')
6706 size = "BYTE";
6707 else if (code == 'w')
6708 size = "WORD";
6709 else if (code == 'k')
6710 size = "DWORD";
6711
e075ae69
RH
6712 fputs (size, file);
6713 fputs (" PTR ", file);
2a2ab3f9 6714 }
e075ae69
RH
6715
6716 x = XEXP (x, 0);
6717 if (flag_pic && CONSTANT_ADDRESS_P (x))
6718 output_pic_addr_const (file, x, code);
0d7d98ee 6719 /* Avoid (%rip) for call operands. */
5bf0ebab 6720 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
0d7d98ee
JH
6721 && GET_CODE (x) != CONST_INT)
6722 output_addr_const (file, x);
c8b94768
RH
6723 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6724 output_operand_lossage ("invalid constraints for operand");
2a2ab3f9 6725 else
e075ae69 6726 output_address (x);
2a2ab3f9 6727 }
e9a25f70 6728
2a2ab3f9
JVA
6729 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6730 {
e9a25f70
JL
6731 REAL_VALUE_TYPE r;
6732 long l;
6733
5f1ec3e6
JVA
6734 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6735 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69 6736
80f33d06 6737 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6738 putc ('$', file);
52267fcb 6739 fprintf (file, "0x%lx", l);
5f1ec3e6 6740 }
e9a25f70 6741
0f290768 6742 /* These float cases don't actually occur as immediate operands. */
5f1ec3e6
JVA
6743 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6744 {
e9a25f70
JL
6745 char dstr[30];
6746
da6eec72 6747 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 6748 fprintf (file, "%s", dstr);
2a2ab3f9 6749 }
e9a25f70 6750
2b589241
JH
6751 else if (GET_CODE (x) == CONST_DOUBLE
6752 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
2a2ab3f9 6753 {
e9a25f70
JL
6754 char dstr[30];
6755
da6eec72 6756 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 6757 fprintf (file, "%s", dstr);
2a2ab3f9 6758 }
f996902d 6759
79325812 6760 else
2a2ab3f9 6761 {
4af3895e 6762 if (code != 'P')
2a2ab3f9 6763 {
695dac07 6764 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69 6765 {
80f33d06 6766 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
6767 putc ('$', file);
6768 }
2a2ab3f9
JVA
6769 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6770 || GET_CODE (x) == LABEL_REF)
e075ae69 6771 {
80f33d06 6772 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
6773 putc ('$', file);
6774 else
6775 fputs ("OFFSET FLAT:", file);
6776 }
2a2ab3f9 6777 }
e075ae69
RH
6778 if (GET_CODE (x) == CONST_INT)
6779 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6780 else if (flag_pic)
2a2ab3f9
JVA
6781 output_pic_addr_const (file, x, code);
6782 else
6783 output_addr_const (file, x);
6784 }
6785}
6786\f
6787/* Print a memory operand whose address is ADDR. */
6788
6789void
6790print_operand_address (file, addr)
6791 FILE *file;
6792 register rtx addr;
6793{
e075ae69
RH
6794 struct ix86_address parts;
6795 rtx base, index, disp;
6796 int scale;
e9a25f70 6797
9e20be0c
JJ
6798 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
6799 {
6800 if (ASSEMBLER_DIALECT == ASM_INTEL)
6801 fputs ("DWORD PTR ", file);
6802 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6803 putc ('%', file);
6804 fputs ("gs:0", file);
6805 return;
6806 }
6807
e075ae69
RH
6808 if (! ix86_decompose_address (addr, &parts))
6809 abort ();
e9a25f70 6810
e075ae69
RH
6811 base = parts.base;
6812 index = parts.index;
6813 disp = parts.disp;
6814 scale = parts.scale;
e9a25f70 6815
e075ae69
RH
6816 if (!base && !index)
6817 {
6818 /* Displacement only requires special attention. */
e9a25f70 6819
e075ae69 6820 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 6821 {
80f33d06 6822 if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
6823 {
6824 if (USER_LABEL_PREFIX[0] == 0)
6825 putc ('%', file);
6826 fputs ("ds:", file);
6827 }
e075ae69 6828 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
2a2ab3f9 6829 }
e075ae69
RH
6830 else if (flag_pic)
6831 output_pic_addr_const (file, addr, 0);
6832 else
6833 output_addr_const (file, addr);
0d7d98ee
JH
6834
6835 /* Use one byte shorter RIP relative addressing for 64bit mode. */
edfe8595
RH
6836 if (TARGET_64BIT
6837 && (GET_CODE (addr) == SYMBOL_REF
6838 || GET_CODE (addr) == LABEL_REF
6839 || (GET_CODE (addr) == CONST
6840 && GET_CODE (XEXP (addr, 0)) == PLUS
200bcf7e
JH
6841 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
6842 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
edfe8595 6843 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
0d7d98ee 6844 fputs ("(%rip)", file);
e075ae69
RH
6845 }
6846 else
6847 {
80f33d06 6848 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9 6849 {
e075ae69 6850 if (disp)
2a2ab3f9 6851 {
c399861d 6852 if (flag_pic)
e075ae69
RH
6853 output_pic_addr_const (file, disp, 0);
6854 else if (GET_CODE (disp) == LABEL_REF)
6855 output_asm_label (disp);
2a2ab3f9 6856 else
e075ae69 6857 output_addr_const (file, disp);
2a2ab3f9
JVA
6858 }
6859
e075ae69
RH
6860 putc ('(', file);
6861 if (base)
6862 PRINT_REG (base, 0, file);
6863 if (index)
2a2ab3f9 6864 {
e075ae69
RH
6865 putc (',', file);
6866 PRINT_REG (index, 0, file);
6867 if (scale != 1)
6868 fprintf (file, ",%d", scale);
2a2ab3f9 6869 }
e075ae69 6870 putc (')', file);
2a2ab3f9 6871 }
2a2ab3f9
JVA
6872 else
6873 {
e075ae69 6874 rtx offset = NULL_RTX;
e9a25f70 6875
e075ae69
RH
6876 if (disp)
6877 {
6878 /* Pull out the offset of a symbol; print any symbol itself. */
6879 if (GET_CODE (disp) == CONST
6880 && GET_CODE (XEXP (disp, 0)) == PLUS
6881 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6882 {
6883 offset = XEXP (XEXP (disp, 0), 1);
6884 disp = gen_rtx_CONST (VOIDmode,
6885 XEXP (XEXP (disp, 0), 0));
6886 }
ce193852 6887
e075ae69
RH
6888 if (flag_pic)
6889 output_pic_addr_const (file, disp, 0);
6890 else if (GET_CODE (disp) == LABEL_REF)
6891 output_asm_label (disp);
6892 else if (GET_CODE (disp) == CONST_INT)
6893 offset = disp;
6894 else
6895 output_addr_const (file, disp);
6896 }
e9a25f70 6897
e075ae69
RH
6898 putc ('[', file);
6899 if (base)
a8620236 6900 {
e075ae69
RH
6901 PRINT_REG (base, 0, file);
6902 if (offset)
6903 {
6904 if (INTVAL (offset) >= 0)
6905 putc ('+', file);
6906 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6907 }
a8620236 6908 }
e075ae69
RH
6909 else if (offset)
6910 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 6911 else
e075ae69 6912 putc ('0', file);
e9a25f70 6913
e075ae69
RH
6914 if (index)
6915 {
6916 putc ('+', file);
6917 PRINT_REG (index, 0, file);
6918 if (scale != 1)
6919 fprintf (file, "*%d", scale);
6920 }
6921 putc (']', file);
6922 }
2a2ab3f9
JVA
6923 }
6924}
f996902d
RH
6925
6926bool
6927output_addr_const_extra (file, x)
6928 FILE *file;
6929 rtx x;
6930{
6931 rtx op;
6932
6933 if (GET_CODE (x) != UNSPEC)
6934 return false;
6935
6936 op = XVECEXP (x, 0, 0);
6937 switch (XINT (x, 1))
6938 {
6939 case UNSPEC_GOTTPOFF:
6940 output_addr_const (file, op);
dea73790 6941 /* FIXME: This might be @TPOFF in Sun ld. */
f996902d
RH
6942 fputs ("@GOTTPOFF", file);
6943 break;
6944 case UNSPEC_TPOFF:
6945 output_addr_const (file, op);
6946 fputs ("@TPOFF", file);
6947 break;
6948 case UNSPEC_NTPOFF:
6949 output_addr_const (file, op);
6950 fputs ("@NTPOFF", file);
6951 break;
6952 case UNSPEC_DTPOFF:
6953 output_addr_const (file, op);
6954 fputs ("@DTPOFF", file);
6955 break;
dea73790
JJ
6956 case UNSPEC_GOTNTPOFF:
6957 output_addr_const (file, op);
6958 fputs ("@GOTNTPOFF", file);
6959 break;
6960 case UNSPEC_INDNTPOFF:
6961 output_addr_const (file, op);
6962 fputs ("@INDNTPOFF", file);
6963 break;
f996902d
RH
6964
6965 default:
6966 return false;
6967 }
6968
6969 return true;
6970}
2a2ab3f9
JVA
6971\f
6972/* Split one or more DImode RTL references into pairs of SImode
6973 references. The RTL can be REG, offsettable MEM, integer constant, or
6974 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6975 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 6976 that parallel "operands". */
2a2ab3f9
JVA
6977
6978void
6979split_di (operands, num, lo_half, hi_half)
6980 rtx operands[];
6981 int num;
6982 rtx lo_half[], hi_half[];
6983{
6984 while (num--)
6985 {
57dbca5e 6986 rtx op = operands[num];
b932f770
JH
6987
6988 /* simplify_subreg refuse to split volatile memory addresses,
6989 but we still have to handle it. */
6990 if (GET_CODE (op) == MEM)
2a2ab3f9 6991 {
f4ef873c 6992 lo_half[num] = adjust_address (op, SImode, 0);
b72f00af 6993 hi_half[num] = adjust_address (op, SImode, 4);
2a2ab3f9
JVA
6994 }
6995 else
b932f770 6996 {
38ca929b
JH
6997 lo_half[num] = simplify_gen_subreg (SImode, op,
6998 GET_MODE (op) == VOIDmode
6999 ? DImode : GET_MODE (op), 0);
7000 hi_half[num] = simplify_gen_subreg (SImode, op,
7001 GET_MODE (op) == VOIDmode
7002 ? DImode : GET_MODE (op), 4);
b932f770 7003 }
2a2ab3f9
JVA
7004 }
7005}
44cf5b6a
JH
7006/* Split one or more TImode RTL references into pairs of SImode
7007 references. The RTL can be REG, offsettable MEM, integer constant, or
7008 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7009 split and "num" is its length. lo_half and hi_half are output arrays
7010 that parallel "operands". */
7011
7012void
7013split_ti (operands, num, lo_half, hi_half)
7014 rtx operands[];
7015 int num;
7016 rtx lo_half[], hi_half[];
7017{
7018 while (num--)
7019 {
7020 rtx op = operands[num];
b932f770
JH
7021
7022 /* simplify_subreg refuse to split volatile memory addresses, but we
7023 still have to handle it. */
7024 if (GET_CODE (op) == MEM)
44cf5b6a
JH
7025 {
7026 lo_half[num] = adjust_address (op, DImode, 0);
7027 hi_half[num] = adjust_address (op, DImode, 8);
7028 }
7029 else
b932f770
JH
7030 {
7031 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7032 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7033 }
44cf5b6a
JH
7034 }
7035}
2a2ab3f9 7036\f
2a2ab3f9
JVA
7037/* Output code to perform a 387 binary operation in INSN, one of PLUS,
7038 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7039 is the expression of the binary operation. The output may either be
7040 emitted here, or returned to the caller, like all output_* functions.
7041
7042 There is no guarantee that the operands are the same mode, as they
0f290768 7043 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 7044
e3c2afab
AM
7045#ifndef SYSV386_COMPAT
7046/* Set to 1 for compatibility with brain-damaged assemblers. No-one
7047 wants to fix the assemblers because that causes incompatibility
7048 with gcc. No-one wants to fix gcc because that causes
7049 incompatibility with assemblers... You can use the option of
7050 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7051#define SYSV386_COMPAT 1
7052#endif
7053
69ddee61 7054const char *
2a2ab3f9
JVA
7055output_387_binary_op (insn, operands)
7056 rtx insn;
7057 rtx *operands;
7058{
e3c2afab 7059 static char buf[30];
69ddee61 7060 const char *p;
1deaa899
JH
7061 const char *ssep;
7062 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
2a2ab3f9 7063
e3c2afab
AM
7064#ifdef ENABLE_CHECKING
7065 /* Even if we do not want to check the inputs, this documents input
7066 constraints. Which helps in understanding the following code. */
7067 if (STACK_REG_P (operands[0])
7068 && ((REG_P (operands[1])
7069 && REGNO (operands[0]) == REGNO (operands[1])
7070 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7071 || (REG_P (operands[2])
7072 && REGNO (operands[0]) == REGNO (operands[2])
7073 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7074 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7075 ; /* ok */
1deaa899 7076 else if (!is_sse)
e3c2afab
AM
7077 abort ();
7078#endif
7079
2a2ab3f9
JVA
7080 switch (GET_CODE (operands[3]))
7081 {
7082 case PLUS:
e075ae69
RH
7083 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7084 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7085 p = "fiadd";
7086 else
7087 p = "fadd";
1deaa899 7088 ssep = "add";
2a2ab3f9
JVA
7089 break;
7090
7091 case MINUS:
e075ae69
RH
7092 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7093 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7094 p = "fisub";
7095 else
7096 p = "fsub";
1deaa899 7097 ssep = "sub";
2a2ab3f9
JVA
7098 break;
7099
7100 case MULT:
e075ae69
RH
7101 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7102 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7103 p = "fimul";
7104 else
7105 p = "fmul";
1deaa899 7106 ssep = "mul";
2a2ab3f9
JVA
7107 break;
7108
7109 case DIV:
e075ae69
RH
7110 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7111 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7112 p = "fidiv";
7113 else
7114 p = "fdiv";
1deaa899 7115 ssep = "div";
2a2ab3f9
JVA
7116 break;
7117
7118 default:
7119 abort ();
7120 }
7121
1deaa899
JH
7122 if (is_sse)
7123 {
7124 strcpy (buf, ssep);
7125 if (GET_MODE (operands[0]) == SFmode)
7126 strcat (buf, "ss\t{%2, %0|%0, %2}");
7127 else
7128 strcat (buf, "sd\t{%2, %0|%0, %2}");
7129 return buf;
7130 }
e075ae69 7131 strcpy (buf, p);
2a2ab3f9
JVA
7132
7133 switch (GET_CODE (operands[3]))
7134 {
7135 case MULT:
7136 case PLUS:
7137 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7138 {
e3c2afab 7139 rtx temp = operands[2];
2a2ab3f9
JVA
7140 operands[2] = operands[1];
7141 operands[1] = temp;
7142 }
7143
e3c2afab
AM
7144 /* know operands[0] == operands[1]. */
7145
2a2ab3f9 7146 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7147 {
7148 p = "%z2\t%2";
7149 break;
7150 }
2a2ab3f9
JVA
7151
7152 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
7153 {
7154 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7155 /* How is it that we are storing to a dead operand[2]?
7156 Well, presumably operands[1] is dead too. We can't
7157 store the result to st(0) as st(0) gets popped on this
7158 instruction. Instead store to operands[2] (which I
7159 think has to be st(1)). st(1) will be popped later.
7160 gcc <= 2.8.1 didn't have this check and generated
7161 assembly code that the Unixware assembler rejected. */
7162 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7163 else
e3c2afab 7164 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 7165 break;
6b28fd63 7166 }
2a2ab3f9
JVA
7167
7168 if (STACK_TOP_P (operands[0]))
e3c2afab 7169 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7170 else
e3c2afab 7171 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 7172 break;
2a2ab3f9
JVA
7173
7174 case MINUS:
7175 case DIV:
7176 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
7177 {
7178 p = "r%z1\t%1";
7179 break;
7180 }
2a2ab3f9
JVA
7181
7182 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7183 {
7184 p = "%z2\t%2";
7185 break;
7186 }
2a2ab3f9 7187
2a2ab3f9 7188 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 7189 {
e3c2afab
AM
7190#if SYSV386_COMPAT
7191 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7192 derived assemblers, confusingly reverse the direction of
7193 the operation for fsub{r} and fdiv{r} when the
7194 destination register is not st(0). The Intel assembler
7195 doesn't have this brain damage. Read !SYSV386_COMPAT to
7196 figure out what the hardware really does. */
7197 if (STACK_TOP_P (operands[0]))
7198 p = "{p\t%0, %2|rp\t%2, %0}";
7199 else
7200 p = "{rp\t%2, %0|p\t%0, %2}";
7201#else
6b28fd63 7202 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7203 /* As above for fmul/fadd, we can't store to st(0). */
7204 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7205 else
e3c2afab
AM
7206 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7207#endif
e075ae69 7208 break;
6b28fd63 7209 }
2a2ab3f9
JVA
7210
7211 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 7212 {
e3c2afab 7213#if SYSV386_COMPAT
6b28fd63 7214 if (STACK_TOP_P (operands[0]))
e3c2afab 7215 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 7216 else
e3c2afab
AM
7217 p = "{p\t%1, %0|rp\t%0, %1}";
7218#else
7219 if (STACK_TOP_P (operands[0]))
7220 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7221 else
7222 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7223#endif
e075ae69 7224 break;
6b28fd63 7225 }
2a2ab3f9
JVA
7226
7227 if (STACK_TOP_P (operands[0]))
7228 {
7229 if (STACK_TOP_P (operands[1]))
e3c2afab 7230 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7231 else
e3c2afab 7232 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 7233 break;
2a2ab3f9
JVA
7234 }
7235 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
7236 {
7237#if SYSV386_COMPAT
7238 p = "{\t%1, %0|r\t%0, %1}";
7239#else
7240 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7241#endif
7242 }
2a2ab3f9 7243 else
e3c2afab
AM
7244 {
7245#if SYSV386_COMPAT
7246 p = "{r\t%2, %0|\t%0, %2}";
7247#else
7248 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7249#endif
7250 }
e075ae69 7251 break;
2a2ab3f9
JVA
7252
7253 default:
7254 abort ();
7255 }
e075ae69
RH
7256
7257 strcat (buf, p);
7258 return buf;
2a2ab3f9 7259}
e075ae69 7260
a4f31c00 7261/* Output code to initialize control word copies used by
7a2e09f4
JH
7262 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7263 is set to control word rounding downwards. */
7264void
7265emit_i387_cw_initialization (normal, round_down)
7266 rtx normal, round_down;
7267{
7268 rtx reg = gen_reg_rtx (HImode);
7269
7270 emit_insn (gen_x86_fnstcw_1 (normal));
7271 emit_move_insn (reg, normal);
7272 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7273 && !TARGET_64BIT)
7274 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7275 else
7276 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7277 emit_move_insn (round_down, reg);
7278}
7279
2a2ab3f9 7280/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 7281 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 7282 operand may be [SDX]Fmode. */
2a2ab3f9 7283
69ddee61 7284const char *
2a2ab3f9
JVA
7285output_fix_trunc (insn, operands)
7286 rtx insn;
7287 rtx *operands;
7288{
7289 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69 7290 int dimode_p = GET_MODE (operands[0]) == DImode;
2a2ab3f9 7291
e075ae69
RH
7292 /* Jump through a hoop or two for DImode, since the hardware has no
7293 non-popping instruction. We used to do this a different way, but
7294 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
7295 if (dimode_p && !stack_top_dies)
7296 output_asm_insn ("fld\t%y1", operands);
e075ae69 7297
7a2e09f4 7298 if (!STACK_TOP_P (operands[1]))
10195bd8
JW
7299 abort ();
7300
e075ae69 7301 if (GET_CODE (operands[0]) != MEM)
7a2e09f4 7302 abort ();
e9a25f70 7303
7a2e09f4 7304 output_asm_insn ("fldcw\t%3", operands);
e075ae69 7305 if (stack_top_dies || dimode_p)
7a2e09f4 7306 output_asm_insn ("fistp%z0\t%0", operands);
10195bd8 7307 else
7a2e09f4 7308 output_asm_insn ("fist%z0\t%0", operands);
e075ae69 7309 output_asm_insn ("fldcw\t%2", operands);
10195bd8 7310
e075ae69 7311 return "";
2a2ab3f9 7312}
cda749b1 7313
e075ae69
RH
7314/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7315 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7316 when fucom should be used. */
7317
69ddee61 7318const char *
e075ae69 7319output_fp_compare (insn, operands, eflags_p, unordered_p)
cda749b1
JW
7320 rtx insn;
7321 rtx *operands;
e075ae69 7322 int eflags_p, unordered_p;
cda749b1 7323{
e075ae69
RH
7324 int stack_top_dies;
7325 rtx cmp_op0 = operands[0];
7326 rtx cmp_op1 = operands[1];
0644b628 7327 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
e075ae69
RH
7328
7329 if (eflags_p == 2)
7330 {
7331 cmp_op0 = cmp_op1;
7332 cmp_op1 = operands[2];
7333 }
0644b628
JH
7334 if (is_sse)
7335 {
7336 if (GET_MODE (operands[0]) == SFmode)
7337 if (unordered_p)
7338 return "ucomiss\t{%1, %0|%0, %1}";
7339 else
7340 return "comiss\t{%1, %0|%0, %y}";
7341 else
7342 if (unordered_p)
7343 return "ucomisd\t{%1, %0|%0, %1}";
7344 else
7345 return "comisd\t{%1, %0|%0, %y}";
7346 }
cda749b1 7347
e075ae69 7348 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
7349 abort ();
7350
e075ae69 7351 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 7352
e075ae69
RH
7353 if (STACK_REG_P (cmp_op1)
7354 && stack_top_dies
7355 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7356 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 7357 {
e075ae69
RH
7358 /* If both the top of the 387 stack dies, and the other operand
7359 is also a stack register that dies, then this must be a
7360 `fcompp' float compare */
7361
7362 if (eflags_p == 1)
7363 {
7364 /* There is no double popping fcomi variant. Fortunately,
7365 eflags is immune from the fstp's cc clobbering. */
7366 if (unordered_p)
7367 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7368 else
7369 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7370 return "fstp\t%y0";
7371 }
7372 else
cda749b1 7373 {
e075ae69
RH
7374 if (eflags_p == 2)
7375 {
7376 if (unordered_p)
7377 return "fucompp\n\tfnstsw\t%0";
7378 else
7379 return "fcompp\n\tfnstsw\t%0";
7380 }
cda749b1
JW
7381 else
7382 {
e075ae69
RH
7383 if (unordered_p)
7384 return "fucompp";
7385 else
7386 return "fcompp";
cda749b1
JW
7387 }
7388 }
cda749b1
JW
7389 }
7390 else
7391 {
e075ae69 7392 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 7393
0f290768 7394 static const char * const alt[24] =
e075ae69
RH
7395 {
7396 "fcom%z1\t%y1",
7397 "fcomp%z1\t%y1",
7398 "fucom%z1\t%y1",
7399 "fucomp%z1\t%y1",
0f290768 7400
e075ae69
RH
7401 "ficom%z1\t%y1",
7402 "ficomp%z1\t%y1",
7403 NULL,
7404 NULL,
7405
7406 "fcomi\t{%y1, %0|%0, %y1}",
7407 "fcomip\t{%y1, %0|%0, %y1}",
7408 "fucomi\t{%y1, %0|%0, %y1}",
7409 "fucomip\t{%y1, %0|%0, %y1}",
7410
7411 NULL,
7412 NULL,
7413 NULL,
7414 NULL,
7415
7416 "fcom%z2\t%y2\n\tfnstsw\t%0",
7417 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7418 "fucom%z2\t%y2\n\tfnstsw\t%0",
7419 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 7420
e075ae69
RH
7421 "ficom%z2\t%y2\n\tfnstsw\t%0",
7422 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7423 NULL,
7424 NULL
7425 };
7426
7427 int mask;
69ddee61 7428 const char *ret;
e075ae69
RH
7429
7430 mask = eflags_p << 3;
7431 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7432 mask |= unordered_p << 1;
7433 mask |= stack_top_dies;
7434
7435 if (mask >= 24)
7436 abort ();
7437 ret = alt[mask];
7438 if (ret == NULL)
7439 abort ();
cda749b1 7440
e075ae69 7441 return ret;
cda749b1
JW
7442 }
7443}
2a2ab3f9 7444
f88c65f7
RH
7445void
7446ix86_output_addr_vec_elt (file, value)
7447 FILE *file;
7448 int value;
7449{
7450 const char *directive = ASM_LONG;
7451
7452 if (TARGET_64BIT)
7453 {
7454#ifdef ASM_QUAD
7455 directive = ASM_QUAD;
7456#else
7457 abort ();
7458#endif
7459 }
7460
7461 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7462}
7463
7464void
7465ix86_output_addr_diff_elt (file, value, rel)
7466 FILE *file;
7467 int value, rel;
7468{
7469 if (TARGET_64BIT)
74411039 7470 fprintf (file, "%s%s%d-%s%d\n",
f88c65f7
RH
7471 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7472 else if (HAVE_AS_GOTOFF_IN_DATA)
7473 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
b069de3b
SS
7474#if TARGET_MACHO
7475 else if (TARGET_MACHO)
7476 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7477 machopic_function_base_name () + 1);
7478#endif
f88c65f7 7479 else
5fc0e5df
KW
7480 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7481 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
f88c65f7 7482}
32b5b1aa 7483\f
a8bac9ab
RH
7484/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7485 for the target. */
7486
7487void
7488ix86_expand_clear (dest)
7489 rtx dest;
7490{
7491 rtx tmp;
7492
7493 /* We play register width games, which are only valid after reload. */
7494 if (!reload_completed)
7495 abort ();
7496
7497 /* Avoid HImode and its attendant prefix byte. */
7498 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7499 dest = gen_rtx_REG (SImode, REGNO (dest));
7500
7501 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7502
7503 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7504 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7505 {
7506 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7507 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7508 }
7509
7510 emit_insn (tmp);
7511}
7512
f996902d
RH
7513/* X is an unchanging MEM. If it is a constant pool reference, return
7514 the constant pool rtx, else NULL. */
7515
7516static rtx
7517maybe_get_pool_constant (x)
7518 rtx x;
7519{
7520 x = XEXP (x, 0);
7521
7522 if (flag_pic)
7523 {
7524 if (GET_CODE (x) != PLUS)
7525 return NULL_RTX;
7526 if (XEXP (x, 0) != pic_offset_table_rtx)
7527 return NULL_RTX;
7528 x = XEXP (x, 1);
7529 if (GET_CODE (x) != CONST)
7530 return NULL_RTX;
7531 x = XEXP (x, 0);
7532 if (GET_CODE (x) != UNSPEC)
7533 return NULL_RTX;
7534 if (XINT (x, 1) != UNSPEC_GOTOFF)
7535 return NULL_RTX;
7536 x = XVECEXP (x, 0, 0);
7537 }
7538
7539 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7540 return get_pool_constant (x);
7541
7542 return NULL_RTX;
7543}
7544
79325812 7545void
e075ae69
RH
7546ix86_expand_move (mode, operands)
7547 enum machine_mode mode;
7548 rtx operands[];
32b5b1aa 7549{
e075ae69 7550 int strict = (reload_in_progress || reload_completed);
f996902d
RH
7551 rtx insn, op0, op1, tmp;
7552
7553 op0 = operands[0];
7554 op1 = operands[1];
7555
7556 /* ??? We have a slight problem. We need to say that tls symbols are
7557 not legitimate constants so that reload does not helpfully reload
7558 these constants from a REG_EQUIV, which we cannot handle. (Recall
7559 that general- and local-dynamic address resolution requires a
7560 function call.)
e9a25f70 7561
f996902d
RH
7562 However, if we say that tls symbols are not legitimate constants,
7563 then emit_move_insn helpfully drop them into the constant pool.
7564
7565 It is far easier to work around emit_move_insn than reload. Recognize
7566 the MEM that we would have created and extract the symbol_ref. */
7567
7568 if (mode == Pmode
7569 && GET_CODE (op1) == MEM
7570 && RTX_UNCHANGING_P (op1))
32b5b1aa 7571 {
f996902d
RH
7572 tmp = maybe_get_pool_constant (op1);
7573 /* Note that we only care about symbolic constants here, which
7574 unlike CONST_INT will always have a proper mode. */
7575 if (tmp && GET_MODE (tmp) == Pmode)
7576 op1 = tmp;
7577 }
e9a25f70 7578
f996902d
RH
7579 if (tls_symbolic_operand (op1, Pmode))
7580 {
7581 op1 = legitimize_address (op1, op1, VOIDmode);
7582 if (GET_CODE (op0) == MEM)
7583 {
7584 tmp = gen_reg_rtx (mode);
7585 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7586 op1 = tmp;
7587 }
7588 }
7589 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7590 {
b069de3b
SS
7591#if TARGET_MACHO
7592 if (MACHOPIC_PURE)
7593 {
7594 rtx temp = ((reload_in_progress
7595 || ((op0 && GET_CODE (op0) == REG)
7596 && mode == Pmode))
7597 ? op0 : gen_reg_rtx (Pmode));
7598 op1 = machopic_indirect_data_reference (op1, temp);
7599 op1 = machopic_legitimize_pic_address (op1, mode,
7600 temp == op1 ? 0 : temp);
7601 }
7602 else
7603 {
7604 if (MACHOPIC_INDIRECT)
7605 op1 = machopic_indirect_data_reference (op1, 0);
7606 }
7607 if (op0 != op1)
7608 {
7609 insn = gen_rtx_SET (VOIDmode, op0, op1);
7610 emit_insn (insn);
7611 }
7612 return;
7613#endif /* TARGET_MACHO */
f996902d
RH
7614 if (GET_CODE (op0) == MEM)
7615 op1 = force_reg (Pmode, op1);
e075ae69 7616 else
32b5b1aa 7617 {
f996902d 7618 rtx temp = op0;
e075ae69
RH
7619 if (GET_CODE (temp) != REG)
7620 temp = gen_reg_rtx (Pmode);
f996902d
RH
7621 temp = legitimize_pic_address (op1, temp);
7622 if (temp == op0)
e075ae69 7623 return;
f996902d 7624 op1 = temp;
32b5b1aa 7625 }
e075ae69
RH
7626 }
7627 else
7628 {
f996902d 7629 if (GET_CODE (op0) == MEM
44cf5b6a 7630 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
f996902d
RH
7631 || !push_operand (op0, mode))
7632 && GET_CODE (op1) == MEM)
7633 op1 = force_reg (mode, op1);
e9a25f70 7634
f996902d
RH
7635 if (push_operand (op0, mode)
7636 && ! general_no_elim_operand (op1, mode))
7637 op1 = copy_to_mode_reg (mode, op1);
2c5a510c 7638
44cf5b6a
JH
7639 /* Force large constants in 64bit compilation into register
7640 to get them CSEed. */
7641 if (TARGET_64BIT && mode == DImode
f996902d
RH
7642 && immediate_operand (op1, mode)
7643 && !x86_64_zero_extended_value (op1)
7644 && !register_operand (op0, mode)
44cf5b6a 7645 && optimize && !reload_completed && !reload_in_progress)
f996902d 7646 op1 = copy_to_mode_reg (mode, op1);
44cf5b6a 7647
e075ae69 7648 if (FLOAT_MODE_P (mode))
32b5b1aa 7649 {
d7a29404
JH
7650 /* If we are loading a floating point constant to a register,
7651 force the value to memory now, since we'll get better code
7652 out the back end. */
e075ae69
RH
7653
7654 if (strict)
7655 ;
f996902d
RH
7656 else if (GET_CODE (op1) == CONST_DOUBLE
7657 && register_operand (op0, mode))
7658 op1 = validize_mem (force_const_mem (mode, op1));
32b5b1aa 7659 }
32b5b1aa 7660 }
e9a25f70 7661
f996902d 7662 insn = gen_rtx_SET (VOIDmode, op0, op1);
e9a25f70 7663
e075ae69
RH
7664 emit_insn (insn);
7665}
e9a25f70 7666
e37af218
RH
7667void
7668ix86_expand_vector_move (mode, operands)
7669 enum machine_mode mode;
7670 rtx operands[];
7671{
7672 /* Force constants other than zero into memory. We do not know how
7673 the instructions used to build constants modify the upper 64 bits
7674 of the register, once we have that information we may be able
7675 to handle some of them more efficiently. */
7676 if ((reload_in_progress | reload_completed) == 0
7677 && register_operand (operands[0], mode)
7678 && CONSTANT_P (operands[1]))
7679 {
7680 rtx addr = gen_reg_rtx (Pmode);
7681 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
7682 operands[1] = gen_rtx_MEM (mode, addr);
7683 }
7684
7685 /* Make operand1 a register if it isn't already. */
7686 if ((reload_in_progress | reload_completed) == 0
7687 && !register_operand (operands[0], mode)
b105d6da 7688 && !register_operand (operands[1], mode))
e37af218 7689 {
59bef189 7690 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
e37af218
RH
7691 emit_move_insn (operands[0], temp);
7692 return;
7693 }
7694
7695 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
fce5a9f2 7696}
e37af218 7697
e075ae69
RH
7698/* Attempt to expand a binary operator. Make the expansion closer to the
7699 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 7700 memory references (one output, two input) in a single insn. */
e9a25f70 7701
e075ae69
RH
7702void
7703ix86_expand_binary_operator (code, mode, operands)
7704 enum rtx_code code;
7705 enum machine_mode mode;
7706 rtx operands[];
7707{
7708 int matching_memory;
7709 rtx src1, src2, dst, op, clob;
7710
7711 dst = operands[0];
7712 src1 = operands[1];
7713 src2 = operands[2];
7714
7715 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7716 if (GET_RTX_CLASS (code) == 'c'
7717 && (rtx_equal_p (dst, src2)
7718 || immediate_operand (src1, mode)))
7719 {
7720 rtx temp = src1;
7721 src1 = src2;
7722 src2 = temp;
32b5b1aa 7723 }
e9a25f70 7724
e075ae69
RH
7725 /* If the destination is memory, and we do not have matching source
7726 operands, do things in registers. */
7727 matching_memory = 0;
7728 if (GET_CODE (dst) == MEM)
32b5b1aa 7729 {
e075ae69
RH
7730 if (rtx_equal_p (dst, src1))
7731 matching_memory = 1;
7732 else if (GET_RTX_CLASS (code) == 'c'
7733 && rtx_equal_p (dst, src2))
7734 matching_memory = 2;
7735 else
7736 dst = gen_reg_rtx (mode);
7737 }
0f290768 7738
e075ae69
RH
7739 /* Both source operands cannot be in memory. */
7740 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7741 {
7742 if (matching_memory != 2)
7743 src2 = force_reg (mode, src2);
7744 else
7745 src1 = force_reg (mode, src1);
32b5b1aa 7746 }
e9a25f70 7747
06a964de
JH
7748 /* If the operation is not commutable, source 1 cannot be a constant
7749 or non-matching memory. */
0f290768 7750 if ((CONSTANT_P (src1)
06a964de
JH
7751 || (!matching_memory && GET_CODE (src1) == MEM))
7752 && GET_RTX_CLASS (code) != 'c')
e075ae69 7753 src1 = force_reg (mode, src1);
0f290768 7754
e075ae69 7755 /* If optimizing, copy to regs to improve CSE */
fe577e58 7756 if (optimize && ! no_new_pseudos)
32b5b1aa 7757 {
e075ae69
RH
7758 if (GET_CODE (dst) == MEM)
7759 dst = gen_reg_rtx (mode);
7760 if (GET_CODE (src1) == MEM)
7761 src1 = force_reg (mode, src1);
7762 if (GET_CODE (src2) == MEM)
7763 src2 = force_reg (mode, src2);
32b5b1aa 7764 }
e9a25f70 7765
e075ae69
RH
7766 /* Emit the instruction. */
7767
7768 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7769 if (reload_in_progress)
7770 {
7771 /* Reload doesn't know about the flags register, and doesn't know that
7772 it doesn't want to clobber it. We can only do this with PLUS. */
7773 if (code != PLUS)
7774 abort ();
7775 emit_insn (op);
7776 }
7777 else
32b5b1aa 7778 {
e075ae69
RH
7779 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7780 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 7781 }
e9a25f70 7782
e075ae69
RH
7783 /* Fix up the destination if needed. */
7784 if (dst != operands[0])
7785 emit_move_insn (operands[0], dst);
7786}
7787
7788/* Return TRUE or FALSE depending on whether the binary operator meets the
7789 appropriate constraints. */
7790
7791int
7792ix86_binary_operator_ok (code, mode, operands)
7793 enum rtx_code code;
7794 enum machine_mode mode ATTRIBUTE_UNUSED;
7795 rtx operands[3];
7796{
7797 /* Both source operands cannot be in memory. */
7798 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7799 return 0;
7800 /* If the operation is not commutable, source 1 cannot be a constant. */
7801 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
7802 return 0;
7803 /* If the destination is memory, we must have a matching source operand. */
7804 if (GET_CODE (operands[0]) == MEM
7805 && ! (rtx_equal_p (operands[0], operands[1])
7806 || (GET_RTX_CLASS (code) == 'c'
7807 && rtx_equal_p (operands[0], operands[2]))))
7808 return 0;
06a964de 7809 /* If the operation is not commutable and the source 1 is memory, we must
d6a7951f 7810 have a matching destination. */
06a964de
JH
7811 if (GET_CODE (operands[1]) == MEM
7812 && GET_RTX_CLASS (code) != 'c'
7813 && ! rtx_equal_p (operands[0], operands[1]))
7814 return 0;
e075ae69
RH
7815 return 1;
7816}
7817
7818/* Attempt to expand a unary operator. Make the expansion closer to the
7819 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 7820 memory references (one output, one input) in a single insn. */
e075ae69 7821
9d81fc27 7822void
e075ae69
RH
7823ix86_expand_unary_operator (code, mode, operands)
7824 enum rtx_code code;
7825 enum machine_mode mode;
7826 rtx operands[];
7827{
06a964de
JH
7828 int matching_memory;
7829 rtx src, dst, op, clob;
7830
7831 dst = operands[0];
7832 src = operands[1];
e075ae69 7833
06a964de
JH
7834 /* If the destination is memory, and we do not have matching source
7835 operands, do things in registers. */
7836 matching_memory = 0;
7837 if (GET_CODE (dst) == MEM)
32b5b1aa 7838 {
06a964de
JH
7839 if (rtx_equal_p (dst, src))
7840 matching_memory = 1;
e075ae69 7841 else
06a964de 7842 dst = gen_reg_rtx (mode);
32b5b1aa 7843 }
e9a25f70 7844
06a964de
JH
7845 /* When source operand is memory, destination must match. */
7846 if (!matching_memory && GET_CODE (src) == MEM)
7847 src = force_reg (mode, src);
0f290768 7848
06a964de 7849 /* If optimizing, copy to regs to improve CSE */
fe577e58 7850 if (optimize && ! no_new_pseudos)
06a964de
JH
7851 {
7852 if (GET_CODE (dst) == MEM)
7853 dst = gen_reg_rtx (mode);
7854 if (GET_CODE (src) == MEM)
7855 src = force_reg (mode, src);
7856 }
7857
7858 /* Emit the instruction. */
7859
7860 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7861 if (reload_in_progress || code == NOT)
7862 {
7863 /* Reload doesn't know about the flags register, and doesn't know that
7864 it doesn't want to clobber it. */
7865 if (code != NOT)
7866 abort ();
7867 emit_insn (op);
7868 }
7869 else
7870 {
7871 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7872 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7873 }
7874
7875 /* Fix up the destination if needed. */
7876 if (dst != operands[0])
7877 emit_move_insn (operands[0], dst);
e075ae69
RH
7878}
7879
7880/* Return TRUE or FALSE depending on whether the unary operator meets the
7881 appropriate constraints. */
7882
7883int
7884ix86_unary_operator_ok (code, mode, operands)
7885 enum rtx_code code ATTRIBUTE_UNUSED;
7886 enum machine_mode mode ATTRIBUTE_UNUSED;
7887 rtx operands[2] ATTRIBUTE_UNUSED;
7888{
06a964de
JH
7889 /* If one of operands is memory, source and destination must match. */
7890 if ((GET_CODE (operands[0]) == MEM
7891 || GET_CODE (operands[1]) == MEM)
7892 && ! rtx_equal_p (operands[0], operands[1]))
7893 return FALSE;
e075ae69
RH
7894 return TRUE;
7895}
7896
16189740
RH
7897/* Return TRUE or FALSE depending on whether the first SET in INSN
7898 has source and destination with matching CC modes, and that the
7899 CC mode is at least as constrained as REQ_MODE. */
7900
7901int
7902ix86_match_ccmode (insn, req_mode)
7903 rtx insn;
7904 enum machine_mode req_mode;
7905{
7906 rtx set;
7907 enum machine_mode set_mode;
7908
7909 set = PATTERN (insn);
7910 if (GET_CODE (set) == PARALLEL)
7911 set = XVECEXP (set, 0, 0);
7912 if (GET_CODE (set) != SET)
7913 abort ();
9076b9c1
JH
7914 if (GET_CODE (SET_SRC (set)) != COMPARE)
7915 abort ();
16189740
RH
7916
7917 set_mode = GET_MODE (SET_DEST (set));
7918 switch (set_mode)
7919 {
9076b9c1
JH
7920 case CCNOmode:
7921 if (req_mode != CCNOmode
7922 && (req_mode != CCmode
7923 || XEXP (SET_SRC (set), 1) != const0_rtx))
7924 return 0;
7925 break;
16189740 7926 case CCmode:
9076b9c1 7927 if (req_mode == CCGCmode)
16189740
RH
7928 return 0;
7929 /* FALLTHRU */
9076b9c1
JH
7930 case CCGCmode:
7931 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7932 return 0;
7933 /* FALLTHRU */
7934 case CCGOCmode:
16189740
RH
7935 if (req_mode == CCZmode)
7936 return 0;
7937 /* FALLTHRU */
7938 case CCZmode:
7939 break;
7940
7941 default:
7942 abort ();
7943 }
7944
7945 return (GET_MODE (SET_SRC (set)) == set_mode);
7946}
7947
e075ae69
RH
7948/* Generate insn patterns to do an integer compare of OPERANDS. */
7949
7950static rtx
7951ix86_expand_int_compare (code, op0, op1)
7952 enum rtx_code code;
7953 rtx op0, op1;
7954{
7955 enum machine_mode cmpmode;
7956 rtx tmp, flags;
7957
7958 cmpmode = SELECT_CC_MODE (code, op0, op1);
7959 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7960
7961 /* This is very simple, but making the interface the same as in the
7962 FP case makes the rest of the code easier. */
7963 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7964 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7965
7966 /* Return the test that should be put into the flags user, i.e.
7967 the bcc, scc, or cmov instruction. */
7968 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7969}
7970
3a3677ff
RH
7971/* Figure out whether to use ordered or unordered fp comparisons.
7972 Return the appropriate mode to use. */
e075ae69 7973
b1cdafbb 7974enum machine_mode
3a3677ff 7975ix86_fp_compare_mode (code)
8752c357 7976 enum rtx_code code ATTRIBUTE_UNUSED;
e075ae69 7977{
9e7adcb3
JH
7978 /* ??? In order to make all comparisons reversible, we do all comparisons
7979 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7980 all forms trapping and nontrapping comparisons, we can make inequality
7981 comparisons trapping again, since it results in better code when using
7982 FCOM based compares. */
7983 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
7984}
7985
9076b9c1
JH
7986enum machine_mode
7987ix86_cc_mode (code, op0, op1)
7988 enum rtx_code code;
7989 rtx op0, op1;
7990{
7991 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7992 return ix86_fp_compare_mode (code);
7993 switch (code)
7994 {
7995 /* Only zero flag is needed. */
7996 case EQ: /* ZF=0 */
7997 case NE: /* ZF!=0 */
7998 return CCZmode;
7999 /* Codes needing carry flag. */
265dab10
JH
8000 case GEU: /* CF=0 */
8001 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
8002 case LTU: /* CF=1 */
8003 case LEU: /* CF=1 | ZF=1 */
265dab10 8004 return CCmode;
9076b9c1
JH
8005 /* Codes possibly doable only with sign flag when
8006 comparing against zero. */
8007 case GE: /* SF=OF or SF=0 */
7e08e190 8008 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
8009 if (op1 == const0_rtx)
8010 return CCGOCmode;
8011 else
8012 /* For other cases Carry flag is not required. */
8013 return CCGCmode;
8014 /* Codes doable only with sign flag when comparing
8015 against zero, but we miss jump instruction for it
8016 so we need to use relational tests agains overflow
8017 that thus needs to be zero. */
8018 case GT: /* ZF=0 & SF=OF */
8019 case LE: /* ZF=1 | SF<>OF */
8020 if (op1 == const0_rtx)
8021 return CCNOmode;
8022 else
8023 return CCGCmode;
7fcd7218
JH
8024 /* strcmp pattern do (use flags) and combine may ask us for proper
8025 mode. */
8026 case USE:
8027 return CCmode;
9076b9c1 8028 default:
0f290768 8029 abort ();
9076b9c1
JH
8030 }
8031}
8032
3a3677ff
RH
8033/* Return true if we should use an FCOMI instruction for this fp comparison. */
8034
a940d8bd 8035int
3a3677ff 8036ix86_use_fcomi_compare (code)
9e7adcb3 8037 enum rtx_code code ATTRIBUTE_UNUSED;
3a3677ff 8038{
9e7adcb3
JH
8039 enum rtx_code swapped_code = swap_condition (code);
8040 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8041 || (ix86_fp_comparison_cost (swapped_code)
8042 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
8043}
8044
0f290768 8045/* Swap, force into registers, or otherwise massage the two operands
3a3677ff
RH
8046 to a fp comparison. The operands are updated in place; the new
8047 comparsion code is returned. */
8048
8049static enum rtx_code
8050ix86_prepare_fp_compare_args (code, pop0, pop1)
8051 enum rtx_code code;
8052 rtx *pop0, *pop1;
8053{
8054 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8055 rtx op0 = *pop0, op1 = *pop1;
8056 enum machine_mode op_mode = GET_MODE (op0);
0644b628 8057 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
3a3677ff 8058
e075ae69 8059 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
8060 The same is true of the XFmode compare instructions. The same is
8061 true of the fcomi compare instructions. */
8062
0644b628
JH
8063 if (!is_sse
8064 && (fpcmp_mode == CCFPUmode
8065 || op_mode == XFmode
8066 || op_mode == TFmode
8067 || ix86_use_fcomi_compare (code)))
e075ae69 8068 {
3a3677ff
RH
8069 op0 = force_reg (op_mode, op0);
8070 op1 = force_reg (op_mode, op1);
e075ae69
RH
8071 }
8072 else
8073 {
8074 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8075 things around if they appear profitable, otherwise force op0
8076 into a register. */
8077
8078 if (standard_80387_constant_p (op0) == 0
8079 || (GET_CODE (op0) == MEM
8080 && ! (standard_80387_constant_p (op1) == 0
8081 || GET_CODE (op1) == MEM)))
32b5b1aa 8082 {
e075ae69
RH
8083 rtx tmp;
8084 tmp = op0, op0 = op1, op1 = tmp;
8085 code = swap_condition (code);
8086 }
8087
8088 if (GET_CODE (op0) != REG)
3a3677ff 8089 op0 = force_reg (op_mode, op0);
e075ae69
RH
8090
8091 if (CONSTANT_P (op1))
8092 {
8093 if (standard_80387_constant_p (op1))
3a3677ff 8094 op1 = force_reg (op_mode, op1);
e075ae69 8095 else
3a3677ff 8096 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
8097 }
8098 }
e9a25f70 8099
9e7adcb3
JH
8100 /* Try to rearrange the comparison to make it cheaper. */
8101 if (ix86_fp_comparison_cost (code)
8102 > ix86_fp_comparison_cost (swap_condition (code))
558740bf 8103 && (GET_CODE (op1) == REG || !no_new_pseudos))
9e7adcb3
JH
8104 {
8105 rtx tmp;
8106 tmp = op0, op0 = op1, op1 = tmp;
8107 code = swap_condition (code);
8108 if (GET_CODE (op0) != REG)
8109 op0 = force_reg (op_mode, op0);
8110 }
8111
3a3677ff
RH
8112 *pop0 = op0;
8113 *pop1 = op1;
8114 return code;
8115}
8116
c0c102a9
JH
8117/* Convert comparison codes we use to represent FP comparison to integer
8118 code that will result in proper branch. Return UNKNOWN if no such code
8119 is available. */
8120static enum rtx_code
8121ix86_fp_compare_code_to_integer (code)
8122 enum rtx_code code;
8123{
8124 switch (code)
8125 {
8126 case GT:
8127 return GTU;
8128 case GE:
8129 return GEU;
8130 case ORDERED:
8131 case UNORDERED:
8132 return code;
8133 break;
8134 case UNEQ:
8135 return EQ;
8136 break;
8137 case UNLT:
8138 return LTU;
8139 break;
8140 case UNLE:
8141 return LEU;
8142 break;
8143 case LTGT:
8144 return NE;
8145 break;
8146 default:
8147 return UNKNOWN;
8148 }
8149}
8150
8151/* Split comparison code CODE into comparisons we can do using branch
8152 instructions. BYPASS_CODE is comparison code for branch that will
8153 branch around FIRST_CODE and SECOND_CODE. If some of branches
8154 is not required, set value to NIL.
8155 We never require more than two branches. */
8156static void
8157ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8158 enum rtx_code code, *bypass_code, *first_code, *second_code;
8159{
8160 *first_code = code;
8161 *bypass_code = NIL;
8162 *second_code = NIL;
8163
8164 /* The fcomi comparison sets flags as follows:
8165
8166 cmp ZF PF CF
8167 > 0 0 0
8168 < 0 0 1
8169 = 1 0 0
8170 un 1 1 1 */
8171
8172 switch (code)
8173 {
8174 case GT: /* GTU - CF=0 & ZF=0 */
8175 case GE: /* GEU - CF=0 */
8176 case ORDERED: /* PF=0 */
8177 case UNORDERED: /* PF=1 */
8178 case UNEQ: /* EQ - ZF=1 */
8179 case UNLT: /* LTU - CF=1 */
8180 case UNLE: /* LEU - CF=1 | ZF=1 */
8181 case LTGT: /* EQ - ZF=0 */
8182 break;
8183 case LT: /* LTU - CF=1 - fails on unordered */
8184 *first_code = UNLT;
8185 *bypass_code = UNORDERED;
8186 break;
8187 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8188 *first_code = UNLE;
8189 *bypass_code = UNORDERED;
8190 break;
8191 case EQ: /* EQ - ZF=1 - fails on unordered */
8192 *first_code = UNEQ;
8193 *bypass_code = UNORDERED;
8194 break;
8195 case NE: /* NE - ZF=0 - fails on unordered */
8196 *first_code = LTGT;
8197 *second_code = UNORDERED;
8198 break;
8199 case UNGE: /* GEU - CF=0 - fails on unordered */
8200 *first_code = GE;
8201 *second_code = UNORDERED;
8202 break;
8203 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8204 *first_code = GT;
8205 *second_code = UNORDERED;
8206 break;
8207 default:
8208 abort ();
8209 }
8210 if (!TARGET_IEEE_FP)
8211 {
8212 *second_code = NIL;
8213 *bypass_code = NIL;
8214 }
8215}
8216
9e7adcb3
JH
8217/* Return cost of comparison done fcom + arithmetics operations on AX.
8218 All following functions do use number of instructions as an cost metrics.
8219 In future this should be tweaked to compute bytes for optimize_size and
8220 take into account performance of various instructions on various CPUs. */
8221static int
8222ix86_fp_comparison_arithmetics_cost (code)
8223 enum rtx_code code;
8224{
8225 if (!TARGET_IEEE_FP)
8226 return 4;
8227 /* The cost of code output by ix86_expand_fp_compare. */
8228 switch (code)
8229 {
8230 case UNLE:
8231 case UNLT:
8232 case LTGT:
8233 case GT:
8234 case GE:
8235 case UNORDERED:
8236 case ORDERED:
8237 case UNEQ:
8238 return 4;
8239 break;
8240 case LT:
8241 case NE:
8242 case EQ:
8243 case UNGE:
8244 return 5;
8245 break;
8246 case LE:
8247 case UNGT:
8248 return 6;
8249 break;
8250 default:
8251 abort ();
8252 }
8253}
8254
8255/* Return cost of comparison done using fcomi operation.
8256 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8257static int
8258ix86_fp_comparison_fcomi_cost (code)
8259 enum rtx_code code;
8260{
8261 enum rtx_code bypass_code, first_code, second_code;
8262 /* Return arbitarily high cost when instruction is not supported - this
8263 prevents gcc from using it. */
8264 if (!TARGET_CMOVE)
8265 return 1024;
8266 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8267 return (bypass_code != NIL || second_code != NIL) + 2;
8268}
8269
8270/* Return cost of comparison done using sahf operation.
8271 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8272static int
8273ix86_fp_comparison_sahf_cost (code)
8274 enum rtx_code code;
8275{
8276 enum rtx_code bypass_code, first_code, second_code;
8277 /* Return arbitarily high cost when instruction is not preferred - this
8278 avoids gcc from using it. */
8279 if (!TARGET_USE_SAHF && !optimize_size)
8280 return 1024;
8281 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8282 return (bypass_code != NIL || second_code != NIL) + 3;
8283}
8284
8285/* Compute cost of the comparison done using any method.
8286 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8287static int
8288ix86_fp_comparison_cost (code)
8289 enum rtx_code code;
8290{
8291 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8292 int min;
8293
8294 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8295 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8296
8297 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8298 if (min > sahf_cost)
8299 min = sahf_cost;
8300 if (min > fcomi_cost)
8301 min = fcomi_cost;
8302 return min;
8303}
c0c102a9 8304
3a3677ff
RH
8305/* Generate insn patterns to do a floating point compare of OPERANDS. */
8306
9e7adcb3
JH
8307static rtx
8308ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
3a3677ff
RH
8309 enum rtx_code code;
8310 rtx op0, op1, scratch;
9e7adcb3
JH
8311 rtx *second_test;
8312 rtx *bypass_test;
3a3677ff
RH
8313{
8314 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 8315 rtx tmp, tmp2;
9e7adcb3 8316 int cost = ix86_fp_comparison_cost (code);
c0c102a9 8317 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
8318
8319 fpcmp_mode = ix86_fp_compare_mode (code);
8320 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8321
9e7adcb3
JH
8322 if (second_test)
8323 *second_test = NULL_RTX;
8324 if (bypass_test)
8325 *bypass_test = NULL_RTX;
8326
c0c102a9
JH
8327 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8328
9e7adcb3
JH
8329 /* Do fcomi/sahf based test when profitable. */
8330 if ((bypass_code == NIL || bypass_test)
8331 && (second_code == NIL || second_test)
8332 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 8333 {
c0c102a9
JH
8334 if (TARGET_CMOVE)
8335 {
8336 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8337 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8338 tmp);
8339 emit_insn (tmp);
8340 }
8341 else
8342 {
8343 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 8344 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
8345 if (!scratch)
8346 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
8347 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8348 emit_insn (gen_x86_sahf_1 (scratch));
8349 }
e075ae69
RH
8350
8351 /* The FP codes work out to act like unsigned. */
9a915772 8352 intcmp_mode = fpcmp_mode;
9e7adcb3
JH
8353 code = first_code;
8354 if (bypass_code != NIL)
8355 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8356 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8357 const0_rtx);
8358 if (second_code != NIL)
8359 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8360 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8361 const0_rtx);
e075ae69
RH
8362 }
8363 else
8364 {
8365 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69 8366 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 8367 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
8368 if (!scratch)
8369 scratch = gen_reg_rtx (HImode);
3a3677ff 8370 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 8371
9a915772
JH
8372 /* In the unordered case, we have to check C2 for NaN's, which
8373 doesn't happen to work out to anything nice combination-wise.
8374 So do some bit twiddling on the value we've got in AH to come
8375 up with an appropriate set of condition codes. */
e075ae69 8376
9a915772
JH
8377 intcmp_mode = CCNOmode;
8378 switch (code)
32b5b1aa 8379 {
9a915772
JH
8380 case GT:
8381 case UNGT:
8382 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 8383 {
3a3677ff 8384 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 8385 code = EQ;
9a915772
JH
8386 }
8387 else
8388 {
8389 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8390 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8391 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8392 intcmp_mode = CCmode;
8393 code = GEU;
8394 }
8395 break;
8396 case LT:
8397 case UNLT:
8398 if (code == LT && TARGET_IEEE_FP)
8399 {
3a3677ff
RH
8400 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8401 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
8402 intcmp_mode = CCmode;
8403 code = EQ;
9a915772
JH
8404 }
8405 else
8406 {
8407 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8408 code = NE;
8409 }
8410 break;
8411 case GE:
8412 case UNGE:
8413 if (code == GE || !TARGET_IEEE_FP)
8414 {
3a3677ff 8415 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 8416 code = EQ;
9a915772
JH
8417 }
8418 else
8419 {
8420 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8421 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8422 GEN_INT (0x01)));
8423 code = NE;
8424 }
8425 break;
8426 case LE:
8427 case UNLE:
8428 if (code == LE && TARGET_IEEE_FP)
8429 {
3a3677ff
RH
8430 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8431 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8432 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
8433 intcmp_mode = CCmode;
8434 code = LTU;
9a915772
JH
8435 }
8436 else
8437 {
8438 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8439 code = NE;
8440 }
8441 break;
8442 case EQ:
8443 case UNEQ:
8444 if (code == EQ && TARGET_IEEE_FP)
8445 {
3a3677ff
RH
8446 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8447 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
8448 intcmp_mode = CCmode;
8449 code = EQ;
9a915772
JH
8450 }
8451 else
8452 {
3a3677ff
RH
8453 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8454 code = NE;
8455 break;
9a915772
JH
8456 }
8457 break;
8458 case NE:
8459 case LTGT:
8460 if (code == NE && TARGET_IEEE_FP)
8461 {
3a3677ff 8462 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
8463 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8464 GEN_INT (0x40)));
3a3677ff 8465 code = NE;
9a915772
JH
8466 }
8467 else
8468 {
3a3677ff
RH
8469 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8470 code = EQ;
32b5b1aa 8471 }
9a915772
JH
8472 break;
8473
8474 case UNORDERED:
8475 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8476 code = NE;
8477 break;
8478 case ORDERED:
8479 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8480 code = EQ;
8481 break;
8482
8483 default:
8484 abort ();
32b5b1aa 8485 }
32b5b1aa 8486 }
e075ae69
RH
8487
8488 /* Return the test that should be put into the flags user, i.e.
8489 the bcc, scc, or cmov instruction. */
8490 return gen_rtx_fmt_ee (code, VOIDmode,
8491 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8492 const0_rtx);
8493}
8494
9e3e266c 8495rtx
a1b8572c 8496ix86_expand_compare (code, second_test, bypass_test)
e075ae69 8497 enum rtx_code code;
a1b8572c 8498 rtx *second_test, *bypass_test;
e075ae69
RH
8499{
8500 rtx op0, op1, ret;
8501 op0 = ix86_compare_op0;
8502 op1 = ix86_compare_op1;
8503
a1b8572c
JH
8504 if (second_test)
8505 *second_test = NULL_RTX;
8506 if (bypass_test)
8507 *bypass_test = NULL_RTX;
8508
e075ae69 8509 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
bf71a4f8 8510 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
77ebd435 8511 second_test, bypass_test);
32b5b1aa 8512 else
e075ae69
RH
8513 ret = ix86_expand_int_compare (code, op0, op1);
8514
8515 return ret;
8516}
8517
03598dea
JH
8518/* Return true if the CODE will result in nontrivial jump sequence. */
8519bool
8520ix86_fp_jump_nontrivial_p (code)
8521 enum rtx_code code;
8522{
8523 enum rtx_code bypass_code, first_code, second_code;
8524 if (!TARGET_CMOVE)
8525 return true;
8526 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8527 return bypass_code != NIL || second_code != NIL;
8528}
8529
e075ae69 8530void
3a3677ff 8531ix86_expand_branch (code, label)
e075ae69 8532 enum rtx_code code;
e075ae69
RH
8533 rtx label;
8534{
3a3677ff 8535 rtx tmp;
e075ae69 8536
3a3677ff 8537 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 8538 {
3a3677ff
RH
8539 case QImode:
8540 case HImode:
8541 case SImode:
0d7d98ee 8542 simple:
a1b8572c 8543 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
8544 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8545 gen_rtx_LABEL_REF (VOIDmode, label),
8546 pc_rtx);
8547 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 8548 return;
e075ae69 8549
3a3677ff
RH
8550 case SFmode:
8551 case DFmode:
0f290768 8552 case XFmode:
2b589241 8553 case TFmode:
3a3677ff
RH
8554 {
8555 rtvec vec;
8556 int use_fcomi;
03598dea 8557 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
8558
8559 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8560 &ix86_compare_op1);
fce5a9f2 8561
03598dea
JH
8562 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8563
8564 /* Check whether we will use the natural sequence with one jump. If
8565 so, we can expand jump early. Otherwise delay expansion by
8566 creating compound insn to not confuse optimizers. */
8567 if (bypass_code == NIL && second_code == NIL
8568 && TARGET_CMOVE)
8569 {
8570 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8571 gen_rtx_LABEL_REF (VOIDmode, label),
8572 pc_rtx, NULL_RTX);
8573 }
8574 else
8575 {
8576 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8577 ix86_compare_op0, ix86_compare_op1);
8578 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8579 gen_rtx_LABEL_REF (VOIDmode, label),
8580 pc_rtx);
8581 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8582
8583 use_fcomi = ix86_use_fcomi_compare (code);
8584 vec = rtvec_alloc (3 + !use_fcomi);
8585 RTVEC_ELT (vec, 0) = tmp;
8586 RTVEC_ELT (vec, 1)
8587 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8588 RTVEC_ELT (vec, 2)
8589 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8590 if (! use_fcomi)
8591 RTVEC_ELT (vec, 3)
8592 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8593
8594 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8595 }
3a3677ff
RH
8596 return;
8597 }
32b5b1aa 8598
3a3677ff 8599 case DImode:
0d7d98ee
JH
8600 if (TARGET_64BIT)
8601 goto simple;
3a3677ff
RH
8602 /* Expand DImode branch into multiple compare+branch. */
8603 {
8604 rtx lo[2], hi[2], label2;
8605 enum rtx_code code1, code2, code3;
32b5b1aa 8606
3a3677ff
RH
8607 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8608 {
8609 tmp = ix86_compare_op0;
8610 ix86_compare_op0 = ix86_compare_op1;
8611 ix86_compare_op1 = tmp;
8612 code = swap_condition (code);
8613 }
8614 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8615 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 8616
3a3677ff
RH
8617 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8618 avoid two branches. This costs one extra insn, so disable when
8619 optimizing for size. */
32b5b1aa 8620
3a3677ff
RH
8621 if ((code == EQ || code == NE)
8622 && (!optimize_size
8623 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8624 {
8625 rtx xor0, xor1;
32b5b1aa 8626
3a3677ff
RH
8627 xor1 = hi[0];
8628 if (hi[1] != const0_rtx)
8629 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8630 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 8631
3a3677ff
RH
8632 xor0 = lo[0];
8633 if (lo[1] != const0_rtx)
8634 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8635 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 8636
3a3677ff
RH
8637 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8638 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 8639
3a3677ff
RH
8640 ix86_compare_op0 = tmp;
8641 ix86_compare_op1 = const0_rtx;
8642 ix86_expand_branch (code, label);
8643 return;
8644 }
e075ae69 8645
1f9124e4
JJ
8646 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8647 op1 is a constant and the low word is zero, then we can just
8648 examine the high word. */
32b5b1aa 8649
1f9124e4
JJ
8650 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8651 switch (code)
8652 {
8653 case LT: case LTU: case GE: case GEU:
8654 ix86_compare_op0 = hi[0];
8655 ix86_compare_op1 = hi[1];
8656 ix86_expand_branch (code, label);
8657 return;
8658 default:
8659 break;
8660 }
e075ae69 8661
3a3677ff 8662 /* Otherwise, we need two or three jumps. */
e075ae69 8663
3a3677ff 8664 label2 = gen_label_rtx ();
e075ae69 8665
3a3677ff
RH
8666 code1 = code;
8667 code2 = swap_condition (code);
8668 code3 = unsigned_condition (code);
e075ae69 8669
3a3677ff
RH
8670 switch (code)
8671 {
8672 case LT: case GT: case LTU: case GTU:
8673 break;
e075ae69 8674
3a3677ff
RH
8675 case LE: code1 = LT; code2 = GT; break;
8676 case GE: code1 = GT; code2 = LT; break;
8677 case LEU: code1 = LTU; code2 = GTU; break;
8678 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 8679
3a3677ff
RH
8680 case EQ: code1 = NIL; code2 = NE; break;
8681 case NE: code2 = NIL; break;
e075ae69 8682
3a3677ff
RH
8683 default:
8684 abort ();
8685 }
e075ae69 8686
3a3677ff
RH
8687 /*
8688 * a < b =>
8689 * if (hi(a) < hi(b)) goto true;
8690 * if (hi(a) > hi(b)) goto false;
8691 * if (lo(a) < lo(b)) goto true;
8692 * false:
8693 */
8694
8695 ix86_compare_op0 = hi[0];
8696 ix86_compare_op1 = hi[1];
8697
8698 if (code1 != NIL)
8699 ix86_expand_branch (code1, label);
8700 if (code2 != NIL)
8701 ix86_expand_branch (code2, label2);
8702
8703 ix86_compare_op0 = lo[0];
8704 ix86_compare_op1 = lo[1];
8705 ix86_expand_branch (code3, label);
8706
8707 if (code2 != NIL)
8708 emit_label (label2);
8709 return;
8710 }
e075ae69 8711
3a3677ff
RH
8712 default:
8713 abort ();
8714 }
32b5b1aa 8715}
e075ae69 8716
9e7adcb3
JH
8717/* Split branch based on floating point condition. */
8718void
03598dea
JH
8719ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
8720 enum rtx_code code;
8721 rtx op1, op2, target1, target2, tmp;
9e7adcb3
JH
8722{
8723 rtx second, bypass;
8724 rtx label = NULL_RTX;
03598dea 8725 rtx condition;
6b24c259
JH
8726 int bypass_probability = -1, second_probability = -1, probability = -1;
8727 rtx i;
9e7adcb3
JH
8728
8729 if (target2 != pc_rtx)
8730 {
8731 rtx tmp = target2;
8732 code = reverse_condition_maybe_unordered (code);
8733 target2 = target1;
8734 target1 = tmp;
8735 }
8736
8737 condition = ix86_expand_fp_compare (code, op1, op2,
8738 tmp, &second, &bypass);
6b24c259
JH
8739
8740 if (split_branch_probability >= 0)
8741 {
8742 /* Distribute the probabilities across the jumps.
8743 Assume the BYPASS and SECOND to be always test
8744 for UNORDERED. */
8745 probability = split_branch_probability;
8746
d6a7951f 8747 /* Value of 1 is low enough to make no need for probability
6b24c259
JH
8748 to be updated. Later we may run some experiments and see
8749 if unordered values are more frequent in practice. */
8750 if (bypass)
8751 bypass_probability = 1;
8752 if (second)
8753 second_probability = 1;
8754 }
9e7adcb3
JH
8755 if (bypass != NULL_RTX)
8756 {
8757 label = gen_label_rtx ();
6b24c259
JH
8758 i = emit_jump_insn (gen_rtx_SET
8759 (VOIDmode, pc_rtx,
8760 gen_rtx_IF_THEN_ELSE (VOIDmode,
8761 bypass,
8762 gen_rtx_LABEL_REF (VOIDmode,
8763 label),
8764 pc_rtx)));
8765 if (bypass_probability >= 0)
8766 REG_NOTES (i)
8767 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8768 GEN_INT (bypass_probability),
8769 REG_NOTES (i));
8770 }
8771 i = emit_jump_insn (gen_rtx_SET
9e7adcb3
JH
8772 (VOIDmode, pc_rtx,
8773 gen_rtx_IF_THEN_ELSE (VOIDmode,
6b24c259
JH
8774 condition, target1, target2)));
8775 if (probability >= 0)
8776 REG_NOTES (i)
8777 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8778 GEN_INT (probability),
8779 REG_NOTES (i));
8780 if (second != NULL_RTX)
9e7adcb3 8781 {
6b24c259
JH
8782 i = emit_jump_insn (gen_rtx_SET
8783 (VOIDmode, pc_rtx,
8784 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8785 target2)));
8786 if (second_probability >= 0)
8787 REG_NOTES (i)
8788 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8789 GEN_INT (second_probability),
8790 REG_NOTES (i));
9e7adcb3 8791 }
9e7adcb3
JH
8792 if (label != NULL_RTX)
8793 emit_label (label);
8794}
8795
32b5b1aa 8796int
3a3677ff 8797ix86_expand_setcc (code, dest)
e075ae69 8798 enum rtx_code code;
e075ae69 8799 rtx dest;
32b5b1aa 8800{
a1b8572c
JH
8801 rtx ret, tmp, tmpreg;
8802 rtx second_test, bypass_test;
e075ae69 8803
885a70fd
JH
8804 if (GET_MODE (ix86_compare_op0) == DImode
8805 && !TARGET_64BIT)
e075ae69
RH
8806 return 0; /* FAIL */
8807
b932f770
JH
8808 if (GET_MODE (dest) != QImode)
8809 abort ();
e075ae69 8810
a1b8572c 8811 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
8812 PUT_MODE (ret, QImode);
8813
8814 tmp = dest;
a1b8572c 8815 tmpreg = dest;
32b5b1aa 8816
e075ae69 8817 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
8818 if (bypass_test || second_test)
8819 {
8820 rtx test = second_test;
8821 int bypass = 0;
8822 rtx tmp2 = gen_reg_rtx (QImode);
8823 if (bypass_test)
8824 {
8825 if (second_test)
b531087a 8826 abort ();
a1b8572c
JH
8827 test = bypass_test;
8828 bypass = 1;
8829 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8830 }
8831 PUT_MODE (test, QImode);
8832 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8833
8834 if (bypass)
8835 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8836 else
8837 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8838 }
e075ae69 8839
e075ae69 8840 return 1; /* DONE */
32b5b1aa 8841}
e075ae69 8842
32b5b1aa 8843int
e075ae69
RH
8844ix86_expand_int_movcc (operands)
8845 rtx operands[];
32b5b1aa 8846{
e075ae69
RH
8847 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8848 rtx compare_seq, compare_op;
a1b8572c 8849 rtx second_test, bypass_test;
635559ab 8850 enum machine_mode mode = GET_MODE (operands[0]);
32b5b1aa 8851
36583fea
JH
8852 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8853 In case comparsion is done with immediate, we can convert it to LTU or
8854 GEU by altering the integer. */
8855
8856 if ((code == LEU || code == GTU)
8857 && GET_CODE (ix86_compare_op1) == CONST_INT
635559ab 8858 && mode != HImode
261376e7
RH
8859 && INTVAL (ix86_compare_op1) != -1
8860 /* For x86-64, the immediate field in the instruction is 32-bit
8861 signed, so we can't increment a DImode value above 0x7fffffff. */
74411039
JH
8862 && (!TARGET_64BIT
8863 || GET_MODE (ix86_compare_op0) != DImode
261376e7 8864 || INTVAL (ix86_compare_op1) != 0x7fffffff)
0f290768 8865 && GET_CODE (operands[2]) == CONST_INT
36583fea
JH
8866 && GET_CODE (operands[3]) == CONST_INT)
8867 {
8868 if (code == LEU)
8869 code = LTU;
8870 else
8871 code = GEU;
261376e7
RH
8872 ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
8873 GET_MODE (ix86_compare_op0));
36583fea 8874 }
3a3677ff 8875
e075ae69 8876 start_sequence ();
a1b8572c 8877 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
2f937369 8878 compare_seq = get_insns ();
e075ae69
RH
8879 end_sequence ();
8880
8881 compare_code = GET_CODE (compare_op);
8882
8883 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8884 HImode insns, we'd be swallowed in word prefix ops. */
8885
635559ab
JH
8886 if (mode != HImode
8887 && (mode != DImode || TARGET_64BIT)
0f290768 8888 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
8889 && GET_CODE (operands[3]) == CONST_INT)
8890 {
8891 rtx out = operands[0];
8892 HOST_WIDE_INT ct = INTVAL (operands[2]);
8893 HOST_WIDE_INT cf = INTVAL (operands[3]);
8894 HOST_WIDE_INT diff;
8895
a1b8572c
JH
8896 if ((compare_code == LTU || compare_code == GEU)
8897 && !second_test && !bypass_test)
e075ae69 8898 {
e075ae69
RH
8899 /* Detect overlap between destination and compare sources. */
8900 rtx tmp = out;
8901
0f290768 8902 /* To simplify rest of code, restrict to the GEU case. */
36583fea
JH
8903 if (compare_code == LTU)
8904 {
8905 int tmp = ct;
8906 ct = cf;
8907 cf = tmp;
8908 compare_code = reverse_condition (compare_code);
8909 code = reverse_condition (code);
8910 }
8911 diff = ct - cf;
8912
e075ae69 8913 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
a500c31b 8914 || reg_overlap_mentioned_p (out, ix86_compare_op1))
635559ab 8915 tmp = gen_reg_rtx (mode);
e075ae69
RH
8916
8917 emit_insn (compare_seq);
635559ab 8918 if (mode == DImode)
14f73b5a
JH
8919 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
8920 else
8921 emit_insn (gen_x86_movsicc_0_m1 (tmp));
e075ae69 8922
36583fea
JH
8923 if (diff == 1)
8924 {
8925 /*
8926 * cmpl op0,op1
8927 * sbbl dest,dest
8928 * [addl dest, ct]
8929 *
8930 * Size 5 - 8.
8931 */
8932 if (ct)
635559ab
JH
8933 tmp = expand_simple_binop (mode, PLUS,
8934 tmp, GEN_INT (ct),
8935 tmp, 1, OPTAB_DIRECT);
36583fea
JH
8936 }
8937 else if (cf == -1)
8938 {
8939 /*
8940 * cmpl op0,op1
8941 * sbbl dest,dest
8942 * orl $ct, dest
8943 *
8944 * Size 8.
8945 */
635559ab
JH
8946 tmp = expand_simple_binop (mode, IOR,
8947 tmp, GEN_INT (ct),
8948 tmp, 1, OPTAB_DIRECT);
36583fea
JH
8949 }
8950 else if (diff == -1 && ct)
8951 {
8952 /*
8953 * cmpl op0,op1
8954 * sbbl dest,dest
06ec023f 8955 * notl dest
36583fea
JH
8956 * [addl dest, cf]
8957 *
8958 * Size 8 - 11.
8959 */
635559ab
JH
8960 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8961 if (cf)
8962 tmp = expand_simple_binop (mode, PLUS,
8963 tmp, GEN_INT (cf),
8964 tmp, 1, OPTAB_DIRECT);
36583fea
JH
8965 }
8966 else
8967 {
8968 /*
8969 * cmpl op0,op1
8970 * sbbl dest,dest
06ec023f 8971 * [notl dest]
36583fea
JH
8972 * andl cf - ct, dest
8973 * [addl dest, ct]
8974 *
8975 * Size 8 - 11.
8976 */
06ec023f
RB
8977
8978 if (cf == 0)
8979 {
8980 cf = ct;
8981 ct = 0;
8982 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8983 }
8984
635559ab
JH
8985 tmp = expand_simple_binop (mode, AND,
8986 tmp,
d8bf17f9 8987 gen_int_mode (cf - ct, mode),
635559ab
JH
8988 tmp, 1, OPTAB_DIRECT);
8989 if (ct)
8990 tmp = expand_simple_binop (mode, PLUS,
8991 tmp, GEN_INT (ct),
8992 tmp, 1, OPTAB_DIRECT);
36583fea 8993 }
e075ae69
RH
8994
8995 if (tmp != out)
8996 emit_move_insn (out, tmp);
8997
8998 return 1; /* DONE */
8999 }
9000
9001 diff = ct - cf;
9002 if (diff < 0)
9003 {
9004 HOST_WIDE_INT tmp;
9005 tmp = ct, ct = cf, cf = tmp;
9006 diff = -diff;
734dba19
JH
9007 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9008 {
9009 /* We may be reversing unordered compare to normal compare, that
9010 is not valid in general (we may convert non-trapping condition
9011 to trapping one), however on i386 we currently emit all
9012 comparisons unordered. */
9013 compare_code = reverse_condition_maybe_unordered (compare_code);
9014 code = reverse_condition_maybe_unordered (code);
9015 }
9016 else
9017 {
9018 compare_code = reverse_condition (compare_code);
9019 code = reverse_condition (code);
9020 }
e075ae69 9021 }
0f2a3457
JJ
9022
9023 compare_code = NIL;
9024 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9025 && GET_CODE (ix86_compare_op1) == CONST_INT)
9026 {
9027 if (ix86_compare_op1 == const0_rtx
9028 && (code == LT || code == GE))
9029 compare_code = code;
9030 else if (ix86_compare_op1 == constm1_rtx)
9031 {
9032 if (code == LE)
9033 compare_code = LT;
9034 else if (code == GT)
9035 compare_code = GE;
9036 }
9037 }
9038
9039 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9040 if (compare_code != NIL
9041 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9042 && (cf == -1 || ct == -1))
9043 {
9044 /* If lea code below could be used, only optimize
9045 if it results in a 2 insn sequence. */
9046
9047 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9048 || diff == 3 || diff == 5 || diff == 9)
9049 || (compare_code == LT && ct == -1)
9050 || (compare_code == GE && cf == -1))
9051 {
9052 /*
9053 * notl op1 (if necessary)
9054 * sarl $31, op1
9055 * orl cf, op1
9056 */
9057 if (ct != -1)
9058 {
9059 cf = ct;
9060 ct = -1;
9061 code = reverse_condition (code);
9062 }
9063
9064 out = emit_store_flag (out, code, ix86_compare_op0,
9065 ix86_compare_op1, VOIDmode, 0, -1);
9066
9067 out = expand_simple_binop (mode, IOR,
9068 out, GEN_INT (cf),
9069 out, 1, OPTAB_DIRECT);
9070 if (out != operands[0])
9071 emit_move_insn (operands[0], out);
9072
9073 return 1; /* DONE */
9074 }
9075 }
9076
635559ab
JH
9077 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9078 || diff == 3 || diff == 5 || diff == 9)
9079 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
e075ae69
RH
9080 {
9081 /*
9082 * xorl dest,dest
9083 * cmpl op1,op2
9084 * setcc dest
9085 * lea cf(dest*(ct-cf)),dest
9086 *
9087 * Size 14.
9088 *
9089 * This also catches the degenerate setcc-only case.
9090 */
9091
9092 rtx tmp;
9093 int nops;
9094
9095 out = emit_store_flag (out, code, ix86_compare_op0,
9096 ix86_compare_op1, VOIDmode, 0, 1);
9097
9098 nops = 0;
97f51ac4
RB
9099 /* On x86_64 the lea instruction operates on Pmode, so we need
9100 to get arithmetics done in proper mode to match. */
e075ae69 9101 if (diff == 1)
14f73b5a 9102 tmp = out;
e075ae69
RH
9103 else
9104 {
885a70fd 9105 rtx out1;
14f73b5a 9106 out1 = out;
635559ab 9107 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
e075ae69
RH
9108 nops++;
9109 if (diff & 1)
9110 {
635559ab 9111 tmp = gen_rtx_PLUS (mode, tmp, out1);
e075ae69
RH
9112 nops++;
9113 }
9114 }
9115 if (cf != 0)
9116 {
635559ab 9117 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
e075ae69
RH
9118 nops++;
9119 }
885a70fd
JH
9120 if (tmp != out
9121 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
e075ae69 9122 {
14f73b5a 9123 if (nops == 1)
e075ae69
RH
9124 {
9125 rtx clob;
9126
9127 clob = gen_rtx_REG (CCmode, FLAGS_REG);
9128 clob = gen_rtx_CLOBBER (VOIDmode, clob);
9129
9130 tmp = gen_rtx_SET (VOIDmode, out, tmp);
9131 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9132 emit_insn (tmp);
9133 }
9134 else
9135 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
9136 }
9137 if (out != operands[0])
1985ef90 9138 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
9139
9140 return 1; /* DONE */
9141 }
9142
9143 /*
9144 * General case: Jumpful:
9145 * xorl dest,dest cmpl op1, op2
9146 * cmpl op1, op2 movl ct, dest
9147 * setcc dest jcc 1f
9148 * decl dest movl cf, dest
9149 * andl (cf-ct),dest 1:
9150 * addl ct,dest
0f290768 9151 *
e075ae69
RH
9152 * Size 20. Size 14.
9153 *
9154 * This is reasonably steep, but branch mispredict costs are
9155 * high on modern cpus, so consider failing only if optimizing
9156 * for space.
9157 *
9158 * %%% Parameterize branch_cost on the tuning architecture, then
9159 * use that. The 80386 couldn't care less about mispredicts.
9160 */
9161
9162 if (!optimize_size && !TARGET_CMOVE)
9163 {
97f51ac4 9164 if (cf == 0)
e075ae69 9165 {
97f51ac4
RB
9166 cf = ct;
9167 ct = 0;
734dba19 9168 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
0f2a3457
JJ
9169 /* We may be reversing unordered compare to normal compare,
9170 that is not valid in general (we may convert non-trapping
9171 condition to trapping one), however on i386 we currently
9172 emit all comparisons unordered. */
9173 code = reverse_condition_maybe_unordered (code);
9174 else
9175 {
9176 code = reverse_condition (code);
9177 if (compare_code != NIL)
9178 compare_code = reverse_condition (compare_code);
9179 }
9180 }
9181
9182 if (compare_code != NIL)
9183 {
9184 /* notl op1 (if needed)
9185 sarl $31, op1
9186 andl (cf-ct), op1
9187 addl ct, op1
9188
9189 For x < 0 (resp. x <= -1) there will be no notl,
9190 so if possible swap the constants to get rid of the
9191 complement.
9192 True/false will be -1/0 while code below (store flag
9193 followed by decrement) is 0/-1, so the constants need
9194 to be exchanged once more. */
9195
9196 if (compare_code == GE || !cf)
734dba19 9197 {
0f2a3457
JJ
9198 code = reverse_condition (code);
9199 compare_code = LT;
734dba19
JH
9200 }
9201 else
9202 {
0f2a3457
JJ
9203 HOST_WIDE_INT tmp = cf;
9204 cf = ct;
9205 ct = tmp;
734dba19 9206 }
0f2a3457
JJ
9207
9208 out = emit_store_flag (out, code, ix86_compare_op0,
9209 ix86_compare_op1, VOIDmode, 0, -1);
e075ae69 9210 }
0f2a3457
JJ
9211 else
9212 {
9213 out = emit_store_flag (out, code, ix86_compare_op0,
9214 ix86_compare_op1, VOIDmode, 0, 1);
e075ae69 9215
97f51ac4 9216 out = expand_simple_binop (mode, PLUS, out, constm1_rtx,
0f2a3457
JJ
9217 out, 1, OPTAB_DIRECT);
9218 }
e075ae69 9219
97f51ac4 9220 out = expand_simple_binop (mode, AND, out,
d8bf17f9 9221 gen_int_mode (cf - ct, mode),
635559ab 9222 out, 1, OPTAB_DIRECT);
97f51ac4
RB
9223 if (ct)
9224 out = expand_simple_binop (mode, PLUS, out, GEN_INT (ct),
9225 out, 1, OPTAB_DIRECT);
e075ae69
RH
9226 if (out != operands[0])
9227 emit_move_insn (operands[0], out);
9228
9229 return 1; /* DONE */
9230 }
9231 }
9232
9233 if (!TARGET_CMOVE)
9234 {
9235 /* Try a few things more with specific constants and a variable. */
9236
78a0d70c 9237 optab op;
e075ae69
RH
9238 rtx var, orig_out, out, tmp;
9239
9240 if (optimize_size)
9241 return 0; /* FAIL */
9242
0f290768 9243 /* If one of the two operands is an interesting constant, load a
e075ae69 9244 constant with the above and mask it in with a logical operation. */
0f290768 9245
e075ae69
RH
9246 if (GET_CODE (operands[2]) == CONST_INT)
9247 {
9248 var = operands[3];
9249 if (INTVAL (operands[2]) == 0)
9250 operands[3] = constm1_rtx, op = and_optab;
9251 else if (INTVAL (operands[2]) == -1)
9252 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
9253 else
9254 return 0; /* FAIL */
e075ae69
RH
9255 }
9256 else if (GET_CODE (operands[3]) == CONST_INT)
9257 {
9258 var = operands[2];
9259 if (INTVAL (operands[3]) == 0)
9260 operands[2] = constm1_rtx, op = and_optab;
9261 else if (INTVAL (operands[3]) == -1)
9262 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
9263 else
9264 return 0; /* FAIL */
e075ae69 9265 }
78a0d70c 9266 else
e075ae69
RH
9267 return 0; /* FAIL */
9268
9269 orig_out = operands[0];
635559ab 9270 tmp = gen_reg_rtx (mode);
e075ae69
RH
9271 operands[0] = tmp;
9272
9273 /* Recurse to get the constant loaded. */
9274 if (ix86_expand_int_movcc (operands) == 0)
9275 return 0; /* FAIL */
9276
9277 /* Mask in the interesting variable. */
635559ab 9278 out = expand_binop (mode, op, var, tmp, orig_out, 0,
e075ae69
RH
9279 OPTAB_WIDEN);
9280 if (out != orig_out)
9281 emit_move_insn (orig_out, out);
9282
9283 return 1; /* DONE */
9284 }
9285
9286 /*
9287 * For comparison with above,
9288 *
9289 * movl cf,dest
9290 * movl ct,tmp
9291 * cmpl op1,op2
9292 * cmovcc tmp,dest
9293 *
9294 * Size 15.
9295 */
9296
635559ab
JH
9297 if (! nonimmediate_operand (operands[2], mode))
9298 operands[2] = force_reg (mode, operands[2]);
9299 if (! nonimmediate_operand (operands[3], mode))
9300 operands[3] = force_reg (mode, operands[3]);
e075ae69 9301
a1b8572c
JH
9302 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9303 {
635559ab 9304 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
9305 emit_move_insn (tmp, operands[3]);
9306 operands[3] = tmp;
9307 }
9308 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9309 {
635559ab 9310 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
9311 emit_move_insn (tmp, operands[2]);
9312 operands[2] = tmp;
9313 }
c9682caf
JH
9314 if (! register_operand (operands[2], VOIDmode)
9315 && ! register_operand (operands[3], VOIDmode))
635559ab 9316 operands[2] = force_reg (mode, operands[2]);
a1b8572c 9317
e075ae69
RH
9318 emit_insn (compare_seq);
9319 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 9320 gen_rtx_IF_THEN_ELSE (mode,
e075ae69
RH
9321 compare_op, operands[2],
9322 operands[3])));
a1b8572c
JH
9323 if (bypass_test)
9324 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 9325 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c
JH
9326 bypass_test,
9327 operands[3],
9328 operands[0])));
9329 if (second_test)
9330 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 9331 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c
JH
9332 second_test,
9333 operands[2],
9334 operands[0])));
e075ae69
RH
9335
9336 return 1; /* DONE */
e9a25f70 9337}
e075ae69 9338
32b5b1aa 9339int
e075ae69
RH
9340ix86_expand_fp_movcc (operands)
9341 rtx operands[];
32b5b1aa 9342{
e075ae69 9343 enum rtx_code code;
e075ae69 9344 rtx tmp;
a1b8572c 9345 rtx compare_op, second_test, bypass_test;
32b5b1aa 9346
0073023d
JH
9347 /* For SF/DFmode conditional moves based on comparisons
9348 in same mode, we may want to use SSE min/max instructions. */
965f5423
JH
9349 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9350 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
0073023d 9351 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
fa9f36a1
JH
9352 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9353 && (!TARGET_IEEE_FP
9354 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
0073023d
JH
9355 /* We may be called from the post-reload splitter. */
9356 && (!REG_P (operands[0])
9357 || SSE_REG_P (operands[0])
52a661a6 9358 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
0073023d
JH
9359 {
9360 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9361 code = GET_CODE (operands[1]);
9362
9363 /* See if we have (cross) match between comparison operands and
9364 conditional move operands. */
9365 if (rtx_equal_p (operands[2], op1))
9366 {
9367 rtx tmp = op0;
9368 op0 = op1;
9369 op1 = tmp;
9370 code = reverse_condition_maybe_unordered (code);
9371 }
9372 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9373 {
9374 /* Check for min operation. */
9375 if (code == LT)
9376 {
9377 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9378 if (memory_operand (op0, VOIDmode))
9379 op0 = force_reg (GET_MODE (operands[0]), op0);
9380 if (GET_MODE (operands[0]) == SFmode)
9381 emit_insn (gen_minsf3 (operands[0], op0, op1));
9382 else
9383 emit_insn (gen_mindf3 (operands[0], op0, op1));
9384 return 1;
9385 }
9386 /* Check for max operation. */
9387 if (code == GT)
9388 {
9389 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9390 if (memory_operand (op0, VOIDmode))
9391 op0 = force_reg (GET_MODE (operands[0]), op0);
9392 if (GET_MODE (operands[0]) == SFmode)
9393 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9394 else
9395 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9396 return 1;
9397 }
9398 }
9399 /* Manage condition to be sse_comparison_operator. In case we are
9400 in non-ieee mode, try to canonicalize the destination operand
9401 to be first in the comparison - this helps reload to avoid extra
9402 moves. */
9403 if (!sse_comparison_operator (operands[1], VOIDmode)
9404 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9405 {
9406 rtx tmp = ix86_compare_op0;
9407 ix86_compare_op0 = ix86_compare_op1;
9408 ix86_compare_op1 = tmp;
9409 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9410 VOIDmode, ix86_compare_op0,
9411 ix86_compare_op1);
9412 }
9413 /* Similary try to manage result to be first operand of conditional
fa9f36a1
JH
9414 move. We also don't support the NE comparison on SSE, so try to
9415 avoid it. */
037f20f1
JH
9416 if ((rtx_equal_p (operands[0], operands[3])
9417 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9418 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
0073023d
JH
9419 {
9420 rtx tmp = operands[2];
9421 operands[2] = operands[3];
92d0fb09 9422 operands[3] = tmp;
0073023d
JH
9423 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9424 (GET_CODE (operands[1])),
9425 VOIDmode, ix86_compare_op0,
9426 ix86_compare_op1);
9427 }
9428 if (GET_MODE (operands[0]) == SFmode)
9429 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9430 operands[2], operands[3],
9431 ix86_compare_op0, ix86_compare_op1));
9432 else
9433 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9434 operands[2], operands[3],
9435 ix86_compare_op0, ix86_compare_op1));
9436 return 1;
9437 }
9438
e075ae69 9439 /* The floating point conditional move instructions don't directly
0f290768 9440 support conditions resulting from a signed integer comparison. */
32b5b1aa 9441
e075ae69 9442 code = GET_CODE (operands[1]);
a1b8572c 9443 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
9444
9445 /* The floating point conditional move instructions don't directly
9446 support signed integer comparisons. */
9447
a1b8572c 9448 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 9449 {
a1b8572c 9450 if (second_test != NULL || bypass_test != NULL)
b531087a 9451 abort ();
e075ae69 9452 tmp = gen_reg_rtx (QImode);
3a3677ff 9453 ix86_expand_setcc (code, tmp);
e075ae69
RH
9454 code = NE;
9455 ix86_compare_op0 = tmp;
9456 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
9457 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9458 }
9459 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9460 {
9461 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9462 emit_move_insn (tmp, operands[3]);
9463 operands[3] = tmp;
9464 }
9465 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9466 {
9467 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9468 emit_move_insn (tmp, operands[2]);
9469 operands[2] = tmp;
e075ae69 9470 }
e9a25f70 9471
e075ae69
RH
9472 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9473 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9e7adcb3 9474 compare_op,
e075ae69
RH
9475 operands[2],
9476 operands[3])));
a1b8572c
JH
9477 if (bypass_test)
9478 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9479 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9480 bypass_test,
9481 operands[3],
9482 operands[0])));
9483 if (second_test)
9484 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9485 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9486 second_test,
9487 operands[2],
9488 operands[0])));
32b5b1aa 9489
e075ae69 9490 return 1;
32b5b1aa
SC
9491}
9492
2450a057
JH
9493/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9494 works for floating pointer parameters and nonoffsetable memories.
9495 For pushes, it returns just stack offsets; the values will be saved
9496 in the right order. Maximally three parts are generated. */
9497
2b589241 9498static int
2450a057
JH
9499ix86_split_to_parts (operand, parts, mode)
9500 rtx operand;
9501 rtx *parts;
9502 enum machine_mode mode;
32b5b1aa 9503{
26e5b205
JH
9504 int size;
9505
9506 if (!TARGET_64BIT)
9507 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9508 else
9509 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 9510
a7180f70
BS
9511 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9512 abort ();
2450a057
JH
9513 if (size < 2 || size > 3)
9514 abort ();
9515
f996902d
RH
9516 /* Optimize constant pool reference to immediates. This is used by fp
9517 moves, that force all constants to memory to allow combining. */
9518 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9519 {
9520 rtx tmp = maybe_get_pool_constant (operand);
9521 if (tmp)
9522 operand = tmp;
9523 }
d7a29404 9524
2450a057 9525 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 9526 {
2450a057
JH
9527 /* The only non-offsetable memories we handle are pushes. */
9528 if (! push_operand (operand, VOIDmode))
9529 abort ();
9530
26e5b205
JH
9531 operand = copy_rtx (operand);
9532 PUT_MODE (operand, Pmode);
2450a057
JH
9533 parts[0] = parts[1] = parts[2] = operand;
9534 }
26e5b205 9535 else if (!TARGET_64BIT)
2450a057
JH
9536 {
9537 if (mode == DImode)
9538 split_di (&operand, 1, &parts[0], &parts[1]);
9539 else
e075ae69 9540 {
2450a057
JH
9541 if (REG_P (operand))
9542 {
9543 if (!reload_completed)
9544 abort ();
9545 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9546 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9547 if (size == 3)
9548 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9549 }
9550 else if (offsettable_memref_p (operand))
9551 {
f4ef873c 9552 operand = adjust_address (operand, SImode, 0);
2450a057 9553 parts[0] = operand;
b72f00af 9554 parts[1] = adjust_address (operand, SImode, 4);
2450a057 9555 if (size == 3)
b72f00af 9556 parts[2] = adjust_address (operand, SImode, 8);
2450a057
JH
9557 }
9558 else if (GET_CODE (operand) == CONST_DOUBLE)
9559 {
9560 REAL_VALUE_TYPE r;
2b589241 9561 long l[4];
2450a057
JH
9562
9563 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9564 switch (mode)
9565 {
9566 case XFmode:
2b589241 9567 case TFmode:
2450a057 9568 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
d8bf17f9 9569 parts[2] = gen_int_mode (l[2], SImode);
2450a057
JH
9570 break;
9571 case DFmode:
9572 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9573 break;
9574 default:
9575 abort ();
9576 }
d8bf17f9
LB
9577 parts[1] = gen_int_mode (l[1], SImode);
9578 parts[0] = gen_int_mode (l[0], SImode);
2450a057
JH
9579 }
9580 else
9581 abort ();
e075ae69 9582 }
2450a057 9583 }
26e5b205
JH
9584 else
9585 {
44cf5b6a
JH
9586 if (mode == TImode)
9587 split_ti (&operand, 1, &parts[0], &parts[1]);
26e5b205
JH
9588 if (mode == XFmode || mode == TFmode)
9589 {
9590 if (REG_P (operand))
9591 {
9592 if (!reload_completed)
9593 abort ();
9594 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9595 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9596 }
9597 else if (offsettable_memref_p (operand))
9598 {
b72f00af 9599 operand = adjust_address (operand, DImode, 0);
26e5b205 9600 parts[0] = operand;
b72f00af 9601 parts[1] = adjust_address (operand, SImode, 8);
26e5b205
JH
9602 }
9603 else if (GET_CODE (operand) == CONST_DOUBLE)
9604 {
9605 REAL_VALUE_TYPE r;
9606 long l[3];
9607
9608 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9609 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9610 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9611 if (HOST_BITS_PER_WIDE_INT >= 64)
523fbd9d 9612 parts[0]
d8bf17f9 9613 = gen_int_mode
44cf5b6a 9614 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
b531087a 9615 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
d8bf17f9 9616 DImode);
26e5b205
JH
9617 else
9618 parts[0] = immed_double_const (l[0], l[1], DImode);
d8bf17f9 9619 parts[1] = gen_int_mode (l[2], SImode);
26e5b205
JH
9620 }
9621 else
9622 abort ();
9623 }
9624 }
2450a057 9625
2b589241 9626 return size;
2450a057
JH
9627}
9628
9629/* Emit insns to perform a move or push of DI, DF, and XF values.
9630 Return false when normal moves are needed; true when all required
9631 insns have been emitted. Operands 2-4 contain the input values
9632 int the correct order; operands 5-7 contain the output values. */
9633
26e5b205
JH
9634void
9635ix86_split_long_move (operands)
9636 rtx operands[];
2450a057
JH
9637{
9638 rtx part[2][3];
26e5b205 9639 int nparts;
2450a057
JH
9640 int push = 0;
9641 int collisions = 0;
26e5b205
JH
9642 enum machine_mode mode = GET_MODE (operands[0]);
9643
9644 /* The DFmode expanders may ask us to move double.
9645 For 64bit target this is single move. By hiding the fact
9646 here we simplify i386.md splitters. */
9647 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9648 {
8cdfa312
RH
9649 /* Optimize constant pool reference to immediates. This is used by
9650 fp moves, that force all constants to memory to allow combining. */
26e5b205
JH
9651
9652 if (GET_CODE (operands[1]) == MEM
9653 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9654 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9655 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9656 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
9657 {
9658 operands[0] = copy_rtx (operands[0]);
9659 PUT_MODE (operands[0], Pmode);
9660 }
26e5b205
JH
9661 else
9662 operands[0] = gen_lowpart (DImode, operands[0]);
9663 operands[1] = gen_lowpart (DImode, operands[1]);
9664 emit_move_insn (operands[0], operands[1]);
9665 return;
9666 }
2450a057 9667
2450a057
JH
9668 /* The only non-offsettable memory we handle is push. */
9669 if (push_operand (operands[0], VOIDmode))
9670 push = 1;
9671 else if (GET_CODE (operands[0]) == MEM
9672 && ! offsettable_memref_p (operands[0]))
9673 abort ();
9674
26e5b205
JH
9675 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9676 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
9677
9678 /* When emitting push, take care for source operands on the stack. */
9679 if (push && GET_CODE (operands[1]) == MEM
9680 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9681 {
26e5b205 9682 if (nparts == 3)
886cbb88
JH
9683 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9684 XEXP (part[1][2], 0));
9685 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9686 XEXP (part[1][1], 0));
2450a057
JH
9687 }
9688
0f290768 9689 /* We need to do copy in the right order in case an address register
2450a057
JH
9690 of the source overlaps the destination. */
9691 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9692 {
9693 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9694 collisions++;
9695 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9696 collisions++;
26e5b205 9697 if (nparts == 3
2450a057
JH
9698 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9699 collisions++;
9700
9701 /* Collision in the middle part can be handled by reordering. */
26e5b205 9702 if (collisions == 1 && nparts == 3
2450a057 9703 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 9704 {
2450a057
JH
9705 rtx tmp;
9706 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9707 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9708 }
e075ae69 9709
2450a057
JH
9710 /* If there are more collisions, we can't handle it by reordering.
9711 Do an lea to the last part and use only one colliding move. */
9712 else if (collisions > 1)
9713 {
9714 collisions = 1;
26e5b205 9715 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
2450a057 9716 XEXP (part[1][0], 0)));
26e5b205
JH
9717 part[1][0] = change_address (part[1][0],
9718 TARGET_64BIT ? DImode : SImode,
9719 part[0][nparts - 1]);
b72f00af 9720 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
26e5b205 9721 if (nparts == 3)
b72f00af 9722 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
2450a057
JH
9723 }
9724 }
9725
9726 if (push)
9727 {
26e5b205 9728 if (!TARGET_64BIT)
2b589241 9729 {
26e5b205
JH
9730 if (nparts == 3)
9731 {
9732 /* We use only first 12 bytes of TFmode value, but for pushing we
9733 are required to adjust stack as if we were pushing real 16byte
9734 value. */
9735 if (mode == TFmode && !TARGET_64BIT)
9736 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
9737 GEN_INT (-4)));
9738 emit_move_insn (part[0][2], part[1][2]);
9739 }
2b589241 9740 }
26e5b205
JH
9741 else
9742 {
9743 /* In 64bit mode we don't have 32bit push available. In case this is
9744 register, it is OK - we will just use larger counterpart. We also
9745 retype memory - these comes from attempt to avoid REX prefix on
9746 moving of second half of TFmode value. */
9747 if (GET_MODE (part[1][1]) == SImode)
9748 {
9749 if (GET_CODE (part[1][1]) == MEM)
f4ef873c 9750 part[1][1] = adjust_address (part[1][1], DImode, 0);
26e5b205
JH
9751 else if (REG_P (part[1][1]))
9752 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9753 else
b531087a 9754 abort ();
886cbb88
JH
9755 if (GET_MODE (part[1][0]) == SImode)
9756 part[1][0] = part[1][1];
26e5b205
JH
9757 }
9758 }
9759 emit_move_insn (part[0][1], part[1][1]);
9760 emit_move_insn (part[0][0], part[1][0]);
9761 return;
2450a057
JH
9762 }
9763
9764 /* Choose correct order to not overwrite the source before it is copied. */
9765 if ((REG_P (part[0][0])
9766 && REG_P (part[1][1])
9767 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 9768 || (nparts == 3
2450a057
JH
9769 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9770 || (collisions > 0
9771 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9772 {
26e5b205 9773 if (nparts == 3)
2450a057 9774 {
26e5b205
JH
9775 operands[2] = part[0][2];
9776 operands[3] = part[0][1];
9777 operands[4] = part[0][0];
9778 operands[5] = part[1][2];
9779 operands[6] = part[1][1];
9780 operands[7] = part[1][0];
2450a057
JH
9781 }
9782 else
9783 {
26e5b205
JH
9784 operands[2] = part[0][1];
9785 operands[3] = part[0][0];
9786 operands[5] = part[1][1];
9787 operands[6] = part[1][0];
2450a057
JH
9788 }
9789 }
9790 else
9791 {
26e5b205 9792 if (nparts == 3)
2450a057 9793 {
26e5b205
JH
9794 operands[2] = part[0][0];
9795 operands[3] = part[0][1];
9796 operands[4] = part[0][2];
9797 operands[5] = part[1][0];
9798 operands[6] = part[1][1];
9799 operands[7] = part[1][2];
2450a057
JH
9800 }
9801 else
9802 {
26e5b205
JH
9803 operands[2] = part[0][0];
9804 operands[3] = part[0][1];
9805 operands[5] = part[1][0];
9806 operands[6] = part[1][1];
e075ae69
RH
9807 }
9808 }
26e5b205
JH
9809 emit_move_insn (operands[2], operands[5]);
9810 emit_move_insn (operands[3], operands[6]);
9811 if (nparts == 3)
9812 emit_move_insn (operands[4], operands[7]);
32b5b1aa 9813
26e5b205 9814 return;
32b5b1aa 9815}
32b5b1aa 9816
e075ae69
RH
9817void
9818ix86_split_ashldi (operands, scratch)
9819 rtx *operands, scratch;
32b5b1aa 9820{
e075ae69
RH
9821 rtx low[2], high[2];
9822 int count;
b985a30f 9823
e075ae69
RH
9824 if (GET_CODE (operands[2]) == CONST_INT)
9825 {
9826 split_di (operands, 2, low, high);
9827 count = INTVAL (operands[2]) & 63;
32b5b1aa 9828
e075ae69
RH
9829 if (count >= 32)
9830 {
9831 emit_move_insn (high[0], low[1]);
9832 emit_move_insn (low[0], const0_rtx);
b985a30f 9833
e075ae69
RH
9834 if (count > 32)
9835 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
9836 }
9837 else
9838 {
9839 if (!rtx_equal_p (operands[0], operands[1]))
9840 emit_move_insn (operands[0], operands[1]);
9841 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
9842 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
9843 }
9844 }
9845 else
9846 {
9847 if (!rtx_equal_p (operands[0], operands[1]))
9848 emit_move_insn (operands[0], operands[1]);
b985a30f 9849
e075ae69 9850 split_di (operands, 1, low, high);
b985a30f 9851
e075ae69
RH
9852 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
9853 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 9854
fe577e58 9855 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 9856 {
fe577e58 9857 if (! no_new_pseudos)
e075ae69
RH
9858 scratch = force_reg (SImode, const0_rtx);
9859 else
9860 emit_move_insn (scratch, const0_rtx);
9861
9862 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
9863 scratch));
9864 }
9865 else
9866 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9867 }
e9a25f70 9868}
32b5b1aa 9869
e075ae69
RH
9870void
9871ix86_split_ashrdi (operands, scratch)
9872 rtx *operands, scratch;
32b5b1aa 9873{
e075ae69
RH
9874 rtx low[2], high[2];
9875 int count;
32b5b1aa 9876
e075ae69
RH
9877 if (GET_CODE (operands[2]) == CONST_INT)
9878 {
9879 split_di (operands, 2, low, high);
9880 count = INTVAL (operands[2]) & 63;
32b5b1aa 9881
e075ae69
RH
9882 if (count >= 32)
9883 {
9884 emit_move_insn (low[0], high[1]);
32b5b1aa 9885
e075ae69
RH
9886 if (! reload_completed)
9887 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
9888 else
9889 {
9890 emit_move_insn (high[0], low[0]);
9891 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9892 }
9893
9894 if (count > 32)
9895 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
9896 }
9897 else
9898 {
9899 if (!rtx_equal_p (operands[0], operands[1]))
9900 emit_move_insn (operands[0], operands[1]);
9901 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9902 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
9903 }
9904 }
9905 else
32b5b1aa 9906 {
e075ae69
RH
9907 if (!rtx_equal_p (operands[0], operands[1]))
9908 emit_move_insn (operands[0], operands[1]);
9909
9910 split_di (operands, 1, low, high);
9911
9912 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9913 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
9914
fe577e58 9915 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 9916 {
fe577e58 9917 if (! no_new_pseudos)
e075ae69
RH
9918 scratch = gen_reg_rtx (SImode);
9919 emit_move_insn (scratch, high[0]);
9920 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
9921 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9922 scratch));
9923 }
9924 else
9925 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 9926 }
e075ae69 9927}
32b5b1aa 9928
e075ae69
RH
9929void
9930ix86_split_lshrdi (operands, scratch)
9931 rtx *operands, scratch;
9932{
9933 rtx low[2], high[2];
9934 int count;
32b5b1aa 9935
e075ae69 9936 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 9937 {
e075ae69
RH
9938 split_di (operands, 2, low, high);
9939 count = INTVAL (operands[2]) & 63;
9940
9941 if (count >= 32)
c7271385 9942 {
e075ae69
RH
9943 emit_move_insn (low[0], high[1]);
9944 emit_move_insn (high[0], const0_rtx);
32b5b1aa 9945
e075ae69
RH
9946 if (count > 32)
9947 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
9948 }
9949 else
9950 {
9951 if (!rtx_equal_p (operands[0], operands[1]))
9952 emit_move_insn (operands[0], operands[1]);
9953 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9954 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
9955 }
32b5b1aa 9956 }
e075ae69
RH
9957 else
9958 {
9959 if (!rtx_equal_p (operands[0], operands[1]))
9960 emit_move_insn (operands[0], operands[1]);
32b5b1aa 9961
e075ae69
RH
9962 split_di (operands, 1, low, high);
9963
9964 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9965 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
9966
9967 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 9968 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 9969 {
fe577e58 9970 if (! no_new_pseudos)
e075ae69
RH
9971 scratch = force_reg (SImode, const0_rtx);
9972 else
9973 emit_move_insn (scratch, const0_rtx);
9974
9975 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9976 scratch));
9977 }
9978 else
9979 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
9980 }
32b5b1aa 9981}
3f803cd9 9982
0407c02b 9983/* Helper function for the string operations below. Dest VARIABLE whether
0945b39d
JH
9984 it is aligned to VALUE bytes. If true, jump to the label. */
9985static rtx
9986ix86_expand_aligntest (variable, value)
9987 rtx variable;
9988 int value;
9989{
9990 rtx label = gen_label_rtx ();
9991 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
9992 if (GET_MODE (variable) == DImode)
9993 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
9994 else
9995 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
9996 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
d43e0b7d 9997 1, label);
0945b39d
JH
9998 return label;
9999}
10000
10001/* Adjust COUNTER by the VALUE. */
10002static void
10003ix86_adjust_counter (countreg, value)
10004 rtx countreg;
10005 HOST_WIDE_INT value;
10006{
10007 if (GET_MODE (countreg) == DImode)
10008 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10009 else
10010 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10011}
10012
10013/* Zero extend possibly SImode EXP to Pmode register. */
d24b3457 10014rtx
0945b39d
JH
10015ix86_zero_extend_to_Pmode (exp)
10016 rtx exp;
10017{
10018 rtx r;
10019 if (GET_MODE (exp) == VOIDmode)
10020 return force_reg (Pmode, exp);
10021 if (GET_MODE (exp) == Pmode)
10022 return copy_to_mode_reg (Pmode, exp);
10023 r = gen_reg_rtx (Pmode);
10024 emit_insn (gen_zero_extendsidi2 (r, exp));
10025 return r;
10026}
10027
10028/* Expand string move (memcpy) operation. Use i386 string operations when
10029 profitable. expand_clrstr contains similar code. */
10030int
10031ix86_expand_movstr (dst, src, count_exp, align_exp)
10032 rtx dst, src, count_exp, align_exp;
10033{
10034 rtx srcreg, destreg, countreg;
10035 enum machine_mode counter_mode;
10036 HOST_WIDE_INT align = 0;
10037 unsigned HOST_WIDE_INT count = 0;
10038 rtx insns;
10039
10040 start_sequence ();
10041
10042 if (GET_CODE (align_exp) == CONST_INT)
10043 align = INTVAL (align_exp);
10044
5519a4f9 10045 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
10046 if (!TARGET_ALIGN_STRINGOPS)
10047 align = 64;
10048
10049 if (GET_CODE (count_exp) == CONST_INT)
10050 count = INTVAL (count_exp);
10051
10052 /* Figure out proper mode for counter. For 32bits it is always SImode,
10053 for 64bits use SImode when possible, otherwise DImode.
10054 Set count to number of bytes copied when known at compile time. */
10055 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10056 || x86_64_zero_extended_value (count_exp))
10057 counter_mode = SImode;
10058 else
10059 counter_mode = DImode;
10060
10061 if (counter_mode != SImode && counter_mode != DImode)
10062 abort ();
10063
10064 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10065 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10066
10067 emit_insn (gen_cld ());
10068
10069 /* When optimizing for size emit simple rep ; movsb instruction for
10070 counts not divisible by 4. */
10071
10072 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10073 {
10074 countreg = ix86_zero_extend_to_Pmode (count_exp);
10075 if (TARGET_64BIT)
10076 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10077 destreg, srcreg, countreg));
10078 else
10079 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10080 destreg, srcreg, countreg));
10081 }
10082
10083 /* For constant aligned (or small unaligned) copies use rep movsl
10084 followed by code copying the rest. For PentiumPro ensure 8 byte
10085 alignment to allow rep movsl acceleration. */
10086
10087 else if (count != 0
10088 && (align >= 8
10089 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 10090 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
10091 {
10092 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10093 if (count & ~(size - 1))
10094 {
10095 countreg = copy_to_mode_reg (counter_mode,
10096 GEN_INT ((count >> (size == 4 ? 2 : 3))
10097 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10098 countreg = ix86_zero_extend_to_Pmode (countreg);
10099 if (size == 4)
10100 {
10101 if (TARGET_64BIT)
10102 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10103 destreg, srcreg, countreg));
10104 else
10105 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10106 destreg, srcreg, countreg));
10107 }
10108 else
10109 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10110 destreg, srcreg, countreg));
10111 }
10112 if (size == 8 && (count & 0x04))
10113 emit_insn (gen_strmovsi (destreg, srcreg));
10114 if (count & 0x02)
10115 emit_insn (gen_strmovhi (destreg, srcreg));
10116 if (count & 0x01)
10117 emit_insn (gen_strmovqi (destreg, srcreg));
10118 }
10119 /* The generic code based on the glibc implementation:
10120 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10121 allowing accelerated copying there)
10122 - copy the data using rep movsl
10123 - copy the rest. */
10124 else
10125 {
10126 rtx countreg2;
10127 rtx label = NULL;
37ad04a5
JH
10128 int desired_alignment = (TARGET_PENTIUMPRO
10129 && (count == 0 || count >= (unsigned int) 260)
10130 ? 8 : UNITS_PER_WORD);
0945b39d
JH
10131
10132 /* In case we don't know anything about the alignment, default to
10133 library version, since it is usually equally fast and result in
10134 shorter code. */
10135 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10136 {
10137 end_sequence ();
10138 return 0;
10139 }
10140
10141 if (TARGET_SINGLE_STRINGOP)
10142 emit_insn (gen_cld ());
10143
10144 countreg2 = gen_reg_rtx (Pmode);
10145 countreg = copy_to_mode_reg (counter_mode, count_exp);
10146
10147 /* We don't use loops to align destination and to copy parts smaller
10148 than 4 bytes, because gcc is able to optimize such code better (in
10149 the case the destination or the count really is aligned, gcc is often
10150 able to predict the branches) and also it is friendlier to the
a4f31c00 10151 hardware branch prediction.
0945b39d
JH
10152
10153 Using loops is benefical for generic case, because we can
10154 handle small counts using the loops. Many CPUs (such as Athlon)
10155 have large REP prefix setup costs.
10156
10157 This is quite costy. Maybe we can revisit this decision later or
10158 add some customizability to this code. */
10159
37ad04a5 10160 if (count == 0 && align < desired_alignment)
0945b39d
JH
10161 {
10162 label = gen_label_rtx ();
aaae0bb9 10163 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 10164 LEU, 0, counter_mode, 1, label);
0945b39d
JH
10165 }
10166 if (align <= 1)
10167 {
10168 rtx label = ix86_expand_aligntest (destreg, 1);
10169 emit_insn (gen_strmovqi (destreg, srcreg));
10170 ix86_adjust_counter (countreg, 1);
10171 emit_label (label);
10172 LABEL_NUSES (label) = 1;
10173 }
10174 if (align <= 2)
10175 {
10176 rtx label = ix86_expand_aligntest (destreg, 2);
10177 emit_insn (gen_strmovhi (destreg, srcreg));
10178 ix86_adjust_counter (countreg, 2);
10179 emit_label (label);
10180 LABEL_NUSES (label) = 1;
10181 }
37ad04a5 10182 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
10183 {
10184 rtx label = ix86_expand_aligntest (destreg, 4);
10185 emit_insn (gen_strmovsi (destreg, srcreg));
10186 ix86_adjust_counter (countreg, 4);
10187 emit_label (label);
10188 LABEL_NUSES (label) = 1;
10189 }
10190
37ad04a5
JH
10191 if (label && desired_alignment > 4 && !TARGET_64BIT)
10192 {
10193 emit_label (label);
10194 LABEL_NUSES (label) = 1;
10195 label = NULL_RTX;
10196 }
0945b39d
JH
10197 if (!TARGET_SINGLE_STRINGOP)
10198 emit_insn (gen_cld ());
10199 if (TARGET_64BIT)
10200 {
10201 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10202 GEN_INT (3)));
10203 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10204 destreg, srcreg, countreg2));
10205 }
10206 else
10207 {
10208 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10209 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10210 destreg, srcreg, countreg2));
10211 }
10212
10213 if (label)
10214 {
10215 emit_label (label);
10216 LABEL_NUSES (label) = 1;
10217 }
10218 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10219 emit_insn (gen_strmovsi (destreg, srcreg));
10220 if ((align <= 4 || count == 0) && TARGET_64BIT)
10221 {
10222 rtx label = ix86_expand_aligntest (countreg, 4);
10223 emit_insn (gen_strmovsi (destreg, srcreg));
10224 emit_label (label);
10225 LABEL_NUSES (label) = 1;
10226 }
10227 if (align > 2 && count != 0 && (count & 2))
10228 emit_insn (gen_strmovhi (destreg, srcreg));
10229 if (align <= 2 || count == 0)
10230 {
10231 rtx label = ix86_expand_aligntest (countreg, 2);
10232 emit_insn (gen_strmovhi (destreg, srcreg));
10233 emit_label (label);
10234 LABEL_NUSES (label) = 1;
10235 }
10236 if (align > 1 && count != 0 && (count & 1))
10237 emit_insn (gen_strmovqi (destreg, srcreg));
10238 if (align <= 1 || count == 0)
10239 {
10240 rtx label = ix86_expand_aligntest (countreg, 1);
10241 emit_insn (gen_strmovqi (destreg, srcreg));
10242 emit_label (label);
10243 LABEL_NUSES (label) = 1;
10244 }
10245 }
10246
10247 insns = get_insns ();
10248 end_sequence ();
10249
10250 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
2f937369 10251 emit_insn (insns);
0945b39d
JH
10252 return 1;
10253}
10254
10255/* Expand string clear operation (bzero). Use i386 string operations when
10256 profitable. expand_movstr contains similar code. */
10257int
10258ix86_expand_clrstr (src, count_exp, align_exp)
10259 rtx src, count_exp, align_exp;
10260{
10261 rtx destreg, zeroreg, countreg;
10262 enum machine_mode counter_mode;
10263 HOST_WIDE_INT align = 0;
10264 unsigned HOST_WIDE_INT count = 0;
10265
10266 if (GET_CODE (align_exp) == CONST_INT)
10267 align = INTVAL (align_exp);
10268
5519a4f9 10269 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
10270 if (!TARGET_ALIGN_STRINGOPS)
10271 align = 32;
10272
10273 if (GET_CODE (count_exp) == CONST_INT)
10274 count = INTVAL (count_exp);
10275 /* Figure out proper mode for counter. For 32bits it is always SImode,
10276 for 64bits use SImode when possible, otherwise DImode.
10277 Set count to number of bytes copied when known at compile time. */
10278 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10279 || x86_64_zero_extended_value (count_exp))
10280 counter_mode = SImode;
10281 else
10282 counter_mode = DImode;
10283
10284 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10285
10286 emit_insn (gen_cld ());
10287
10288 /* When optimizing for size emit simple rep ; movsb instruction for
10289 counts not divisible by 4. */
10290
10291 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10292 {
10293 countreg = ix86_zero_extend_to_Pmode (count_exp);
10294 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10295 if (TARGET_64BIT)
10296 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10297 destreg, countreg));
10298 else
10299 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10300 destreg, countreg));
10301 }
10302 else if (count != 0
10303 && (align >= 8
10304 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 10305 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
10306 {
10307 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10308 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10309 if (count & ~(size - 1))
10310 {
10311 countreg = copy_to_mode_reg (counter_mode,
10312 GEN_INT ((count >> (size == 4 ? 2 : 3))
10313 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10314 countreg = ix86_zero_extend_to_Pmode (countreg);
10315 if (size == 4)
10316 {
10317 if (TARGET_64BIT)
10318 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10319 destreg, countreg));
10320 else
10321 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10322 destreg, countreg));
10323 }
10324 else
10325 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10326 destreg, countreg));
10327 }
10328 if (size == 8 && (count & 0x04))
10329 emit_insn (gen_strsetsi (destreg,
10330 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10331 if (count & 0x02)
10332 emit_insn (gen_strsethi (destreg,
10333 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10334 if (count & 0x01)
10335 emit_insn (gen_strsetqi (destreg,
10336 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10337 }
10338 else
10339 {
10340 rtx countreg2;
10341 rtx label = NULL;
37ad04a5
JH
10342 /* Compute desired alignment of the string operation. */
10343 int desired_alignment = (TARGET_PENTIUMPRO
10344 && (count == 0 || count >= (unsigned int) 260)
10345 ? 8 : UNITS_PER_WORD);
0945b39d
JH
10346
10347 /* In case we don't know anything about the alignment, default to
10348 library version, since it is usually equally fast and result in
10349 shorter code. */
10350 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10351 return 0;
10352
10353 if (TARGET_SINGLE_STRINGOP)
10354 emit_insn (gen_cld ());
10355
10356 countreg2 = gen_reg_rtx (Pmode);
10357 countreg = copy_to_mode_reg (counter_mode, count_exp);
10358 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10359
37ad04a5 10360 if (count == 0 && align < desired_alignment)
0945b39d
JH
10361 {
10362 label = gen_label_rtx ();
37ad04a5 10363 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 10364 LEU, 0, counter_mode, 1, label);
0945b39d
JH
10365 }
10366 if (align <= 1)
10367 {
10368 rtx label = ix86_expand_aligntest (destreg, 1);
10369 emit_insn (gen_strsetqi (destreg,
10370 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10371 ix86_adjust_counter (countreg, 1);
10372 emit_label (label);
10373 LABEL_NUSES (label) = 1;
10374 }
10375 if (align <= 2)
10376 {
10377 rtx label = ix86_expand_aligntest (destreg, 2);
10378 emit_insn (gen_strsethi (destreg,
10379 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10380 ix86_adjust_counter (countreg, 2);
10381 emit_label (label);
10382 LABEL_NUSES (label) = 1;
10383 }
37ad04a5 10384 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
10385 {
10386 rtx label = ix86_expand_aligntest (destreg, 4);
10387 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10388 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10389 : zeroreg)));
10390 ix86_adjust_counter (countreg, 4);
10391 emit_label (label);
10392 LABEL_NUSES (label) = 1;
10393 }
10394
37ad04a5
JH
10395 if (label && desired_alignment > 4 && !TARGET_64BIT)
10396 {
10397 emit_label (label);
10398 LABEL_NUSES (label) = 1;
10399 label = NULL_RTX;
10400 }
10401
0945b39d
JH
10402 if (!TARGET_SINGLE_STRINGOP)
10403 emit_insn (gen_cld ());
10404 if (TARGET_64BIT)
10405 {
10406 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10407 GEN_INT (3)));
10408 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10409 destreg, countreg2));
10410 }
10411 else
10412 {
10413 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10414 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10415 destreg, countreg2));
10416 }
0945b39d
JH
10417 if (label)
10418 {
10419 emit_label (label);
10420 LABEL_NUSES (label) = 1;
10421 }
37ad04a5 10422
0945b39d
JH
10423 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10424 emit_insn (gen_strsetsi (destreg,
10425 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10426 if (TARGET_64BIT && (align <= 4 || count == 0))
10427 {
79258dce 10428 rtx label = ix86_expand_aligntest (countreg, 4);
0945b39d
JH
10429 emit_insn (gen_strsetsi (destreg,
10430 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10431 emit_label (label);
10432 LABEL_NUSES (label) = 1;
10433 }
10434 if (align > 2 && count != 0 && (count & 2))
10435 emit_insn (gen_strsethi (destreg,
10436 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10437 if (align <= 2 || count == 0)
10438 {
74411039 10439 rtx label = ix86_expand_aligntest (countreg, 2);
0945b39d
JH
10440 emit_insn (gen_strsethi (destreg,
10441 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10442 emit_label (label);
10443 LABEL_NUSES (label) = 1;
10444 }
10445 if (align > 1 && count != 0 && (count & 1))
10446 emit_insn (gen_strsetqi (destreg,
10447 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10448 if (align <= 1 || count == 0)
10449 {
74411039 10450 rtx label = ix86_expand_aligntest (countreg, 1);
0945b39d
JH
10451 emit_insn (gen_strsetqi (destreg,
10452 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10453 emit_label (label);
10454 LABEL_NUSES (label) = 1;
10455 }
10456 }
10457 return 1;
10458}
10459/* Expand strlen. */
10460int
10461ix86_expand_strlen (out, src, eoschar, align)
10462 rtx out, src, eoschar, align;
10463{
10464 rtx addr, scratch1, scratch2, scratch3, scratch4;
10465
10466 /* The generic case of strlen expander is long. Avoid it's
10467 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10468
10469 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10470 && !TARGET_INLINE_ALL_STRINGOPS
10471 && !optimize_size
10472 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10473 return 0;
10474
10475 addr = force_reg (Pmode, XEXP (src, 0));
10476 scratch1 = gen_reg_rtx (Pmode);
10477
10478 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10479 && !optimize_size)
10480 {
10481 /* Well it seems that some optimizer does not combine a call like
10482 foo(strlen(bar), strlen(bar));
10483 when the move and the subtraction is done here. It does calculate
10484 the length just once when these instructions are done inside of
10485 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10486 often used and I use one fewer register for the lifetime of
10487 output_strlen_unroll() this is better. */
10488
10489 emit_move_insn (out, addr);
10490
10491 ix86_expand_strlensi_unroll_1 (out, align);
10492
10493 /* strlensi_unroll_1 returns the address of the zero at the end of
10494 the string, like memchr(), so compute the length by subtracting
10495 the start address. */
10496 if (TARGET_64BIT)
10497 emit_insn (gen_subdi3 (out, out, addr));
10498 else
10499 emit_insn (gen_subsi3 (out, out, addr));
10500 }
10501 else
10502 {
10503 scratch2 = gen_reg_rtx (Pmode);
10504 scratch3 = gen_reg_rtx (Pmode);
10505 scratch4 = force_reg (Pmode, constm1_rtx);
10506
10507 emit_move_insn (scratch3, addr);
10508 eoschar = force_reg (QImode, eoschar);
10509
10510 emit_insn (gen_cld ());
10511 if (TARGET_64BIT)
10512 {
10513 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10514 align, scratch4, scratch3));
10515 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10516 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10517 }
10518 else
10519 {
10520 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10521 align, scratch4, scratch3));
10522 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10523 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10524 }
10525 }
10526 return 1;
10527}
10528
e075ae69
RH
10529/* Expand the appropriate insns for doing strlen if not just doing
10530 repnz; scasb
10531
10532 out = result, initialized with the start address
10533 align_rtx = alignment of the address.
10534 scratch = scratch register, initialized with the startaddress when
77ebd435 10535 not aligned, otherwise undefined
3f803cd9
SC
10536
10537 This is just the body. It needs the initialisations mentioned above and
10538 some address computing at the end. These things are done in i386.md. */
10539
0945b39d
JH
10540static void
10541ix86_expand_strlensi_unroll_1 (out, align_rtx)
10542 rtx out, align_rtx;
3f803cd9 10543{
e075ae69
RH
10544 int align;
10545 rtx tmp;
10546 rtx align_2_label = NULL_RTX;
10547 rtx align_3_label = NULL_RTX;
10548 rtx align_4_label = gen_label_rtx ();
10549 rtx end_0_label = gen_label_rtx ();
e075ae69 10550 rtx mem;
e2e52e1b 10551 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 10552 rtx scratch = gen_reg_rtx (SImode);
e075ae69
RH
10553
10554 align = 0;
10555 if (GET_CODE (align_rtx) == CONST_INT)
10556 align = INTVAL (align_rtx);
3f803cd9 10557
e9a25f70 10558 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 10559
e9a25f70 10560 /* Is there a known alignment and is it less than 4? */
e075ae69 10561 if (align < 4)
3f803cd9 10562 {
0945b39d
JH
10563 rtx scratch1 = gen_reg_rtx (Pmode);
10564 emit_move_insn (scratch1, out);
e9a25f70 10565 /* Is there a known alignment and is it not 2? */
e075ae69 10566 if (align != 2)
3f803cd9 10567 {
e075ae69
RH
10568 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10569 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10570
10571 /* Leave just the 3 lower bits. */
0945b39d 10572 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
10573 NULL_RTX, 0, OPTAB_WIDEN);
10574
9076b9c1 10575 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 10576 Pmode, 1, align_4_label);
9076b9c1 10577 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
d43e0b7d 10578 Pmode, 1, align_2_label);
9076b9c1 10579 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
d43e0b7d 10580 Pmode, 1, align_3_label);
3f803cd9
SC
10581 }
10582 else
10583 {
e9a25f70
JL
10584 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10585 check if is aligned to 4 - byte. */
e9a25f70 10586
0945b39d 10587 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
e075ae69
RH
10588 NULL_RTX, 0, OPTAB_WIDEN);
10589
9076b9c1 10590 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 10591 Pmode, 1, align_4_label);
3f803cd9
SC
10592 }
10593
e075ae69 10594 mem = gen_rtx_MEM (QImode, out);
e9a25f70 10595
e075ae69 10596 /* Now compare the bytes. */
e9a25f70 10597
0f290768 10598 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1 10599 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
d43e0b7d 10600 QImode, 1, end_0_label);
3f803cd9 10601
0f290768 10602 /* Increment the address. */
0945b39d
JH
10603 if (TARGET_64BIT)
10604 emit_insn (gen_adddi3 (out, out, const1_rtx));
10605 else
10606 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 10607
e075ae69
RH
10608 /* Not needed with an alignment of 2 */
10609 if (align != 2)
10610 {
10611 emit_label (align_2_label);
3f803cd9 10612
d43e0b7d
RK
10613 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10614 end_0_label);
e075ae69 10615
0945b39d
JH
10616 if (TARGET_64BIT)
10617 emit_insn (gen_adddi3 (out, out, const1_rtx));
10618 else
10619 emit_insn (gen_addsi3 (out, out, const1_rtx));
e075ae69
RH
10620
10621 emit_label (align_3_label);
10622 }
10623
d43e0b7d
RK
10624 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10625 end_0_label);
e075ae69 10626
0945b39d
JH
10627 if (TARGET_64BIT)
10628 emit_insn (gen_adddi3 (out, out, const1_rtx));
10629 else
10630 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
10631 }
10632
e075ae69
RH
10633 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10634 align this loop. It gives only huge programs, but does not help to
10635 speed up. */
10636 emit_label (align_4_label);
3f803cd9 10637
e075ae69
RH
10638 mem = gen_rtx_MEM (SImode, out);
10639 emit_move_insn (scratch, mem);
0945b39d
JH
10640 if (TARGET_64BIT)
10641 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10642 else
10643 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 10644
e2e52e1b
JH
10645 /* This formula yields a nonzero result iff one of the bytes is zero.
10646 This saves three branches inside loop and many cycles. */
10647
10648 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
10649 emit_insn (gen_one_cmplsi2 (scratch, scratch));
10650 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0 10651 emit_insn (gen_andsi3 (tmpreg, tmpreg,
d8bf17f9 10652 gen_int_mode (0x80808080, SImode)));
d43e0b7d
RK
10653 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
10654 align_4_label);
e2e52e1b
JH
10655
10656 if (TARGET_CMOVE)
10657 {
10658 rtx reg = gen_reg_rtx (SImode);
0945b39d 10659 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
10660 emit_move_insn (reg, tmpreg);
10661 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
10662
0f290768 10663 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 10664 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
10665 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10666 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10667 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
10668 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
10669 reg,
10670 tmpreg)));
e2e52e1b 10671 /* Emit lea manually to avoid clobbering of flags. */
0945b39d
JH
10672 emit_insn (gen_rtx_SET (SImode, reg2,
10673 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
e2e52e1b
JH
10674
10675 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10676 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10677 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d 10678 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
a4f31c00
AJ
10679 reg2,
10680 out)));
e2e52e1b
JH
10681
10682 }
10683 else
10684 {
10685 rtx end_2_label = gen_label_rtx ();
10686 /* Is zero in the first two bytes? */
10687
16189740 10688 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
10689 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10690 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
10691 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10692 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
10693 pc_rtx);
10694 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10695 JUMP_LABEL (tmp) = end_2_label;
10696
0f290768 10697 /* Not in the first two. Move two bytes forward. */
e2e52e1b 10698 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
0945b39d
JH
10699 if (TARGET_64BIT)
10700 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
10701 else
10702 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
e2e52e1b
JH
10703
10704 emit_label (end_2_label);
10705
10706 }
10707
0f290768 10708 /* Avoid branch in fixing the byte. */
e2e52e1b 10709 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 10710 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
0945b39d
JH
10711 if (TARGET_64BIT)
10712 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
10713 else
10714 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
e075ae69
RH
10715
10716 emit_label (end_0_label);
10717}
0e07aff3
RH
10718
10719void
10720ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
10721 rtx retval, fnaddr, callarg1, callarg2, pop;
10722{
10723 rtx use = NULL, call;
10724
10725 if (pop == const0_rtx)
10726 pop = NULL;
10727 if (TARGET_64BIT && pop)
10728 abort ();
10729
b069de3b
SS
10730#if TARGET_MACHO
10731 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
10732 fnaddr = machopic_indirect_call_target (fnaddr);
10733#else
0e07aff3
RH
10734 /* Static functions and indirect calls don't need the pic register. */
10735 if (! TARGET_64BIT && flag_pic
10736 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
10737 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
66edd3b4 10738 use_reg (&use, pic_offset_table_rtx);
0e07aff3
RH
10739
10740 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
10741 {
10742 rtx al = gen_rtx_REG (QImode, 0);
10743 emit_move_insn (al, callarg2);
10744 use_reg (&use, al);
10745 }
b069de3b 10746#endif /* TARGET_MACHO */
0e07aff3
RH
10747
10748 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
10749 {
10750 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10751 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10752 }
10753
10754 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
10755 if (retval)
10756 call = gen_rtx_SET (VOIDmode, retval, call);
10757 if (pop)
10758 {
10759 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
10760 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
10761 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
10762 }
10763
10764 call = emit_call_insn (call);
10765 if (use)
10766 CALL_INSN_FUNCTION_USAGE (call) = use;
10767}
fce5a9f2 10768
e075ae69 10769\f
e075ae69
RH
10770/* Clear stack slot assignments remembered from previous functions.
10771 This is called from INIT_EXPANDERS once before RTL is emitted for each
10772 function. */
10773
e2500fed
GK
10774static struct machine_function *
10775ix86_init_machine_status ()
37b15744 10776{
e2500fed 10777 return ggc_alloc_cleared (sizeof (struct machine_function));
1526a060
BS
10778}
10779
e075ae69
RH
10780/* Return a MEM corresponding to a stack slot with mode MODE.
10781 Allocate a new slot if necessary.
10782
10783 The RTL for a function can have several slots available: N is
10784 which slot to use. */
10785
10786rtx
10787assign_386_stack_local (mode, n)
10788 enum machine_mode mode;
10789 int n;
10790{
10791 if (n < 0 || n >= MAX_386_STACK_LOCALS)
10792 abort ();
10793
10794 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
10795 ix86_stack_locals[(int) mode][n]
10796 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
10797
10798 return ix86_stack_locals[(int) mode][n];
10799}
f996902d
RH
10800
10801/* Construct the SYMBOL_REF for the tls_get_addr function. */
10802
e2500fed 10803static GTY(()) rtx ix86_tls_symbol;
f996902d
RH
10804rtx
10805ix86_tls_get_addr ()
10806{
f996902d 10807
e2500fed 10808 if (!ix86_tls_symbol)
f996902d 10809 {
e2500fed 10810 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, (TARGET_GNU_TLS
f996902d
RH
10811 ? "___tls_get_addr"
10812 : "__tls_get_addr"));
f996902d
RH
10813 }
10814
e2500fed 10815 return ix86_tls_symbol;
f996902d 10816}
e075ae69
RH
10817\f
10818/* Calculate the length of the memory address in the instruction
10819 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10820
10821static int
10822memory_address_length (addr)
10823 rtx addr;
10824{
10825 struct ix86_address parts;
10826 rtx base, index, disp;
10827 int len;
10828
10829 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
10830 || GET_CODE (addr) == POST_INC
10831 || GET_CODE (addr) == PRE_MODIFY
10832 || GET_CODE (addr) == POST_MODIFY)
e075ae69 10833 return 0;
3f803cd9 10834
e075ae69
RH
10835 if (! ix86_decompose_address (addr, &parts))
10836 abort ();
3f803cd9 10837
e075ae69
RH
10838 base = parts.base;
10839 index = parts.index;
10840 disp = parts.disp;
10841 len = 0;
3f803cd9 10842
e075ae69
RH
10843 /* Register Indirect. */
10844 if (base && !index && !disp)
10845 {
10846 /* Special cases: ebp and esp need the two-byte modrm form. */
10847 if (addr == stack_pointer_rtx
10848 || addr == arg_pointer_rtx
564d80f4
JH
10849 || addr == frame_pointer_rtx
10850 || addr == hard_frame_pointer_rtx)
e075ae69 10851 len = 1;
3f803cd9 10852 }
e9a25f70 10853
e075ae69
RH
10854 /* Direct Addressing. */
10855 else if (disp && !base && !index)
10856 len = 4;
10857
3f803cd9
SC
10858 else
10859 {
e075ae69
RH
10860 /* Find the length of the displacement constant. */
10861 if (disp)
10862 {
10863 if (GET_CODE (disp) == CONST_INT
10864 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
10865 len = 1;
10866 else
10867 len = 4;
10868 }
3f803cd9 10869
e075ae69
RH
10870 /* An index requires the two-byte modrm form. */
10871 if (index)
10872 len += 1;
3f803cd9
SC
10873 }
10874
e075ae69
RH
10875 return len;
10876}
79325812 10877
5bf0ebab
RH
10878/* Compute default value for "length_immediate" attribute. When SHORTFORM
10879 is set, expect that insn have 8bit immediate alternative. */
e075ae69 10880int
6ef67412 10881ix86_attr_length_immediate_default (insn, shortform)
e075ae69 10882 rtx insn;
6ef67412 10883 int shortform;
e075ae69 10884{
6ef67412
JH
10885 int len = 0;
10886 int i;
6c698a6d 10887 extract_insn_cached (insn);
6ef67412
JH
10888 for (i = recog_data.n_operands - 1; i >= 0; --i)
10889 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 10890 {
6ef67412 10891 if (len)
3071fab5 10892 abort ();
6ef67412
JH
10893 if (shortform
10894 && GET_CODE (recog_data.operand[i]) == CONST_INT
10895 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
10896 len = 1;
10897 else
10898 {
10899 switch (get_attr_mode (insn))
10900 {
10901 case MODE_QI:
10902 len+=1;
10903 break;
10904 case MODE_HI:
10905 len+=2;
10906 break;
10907 case MODE_SI:
10908 len+=4;
10909 break;
14f73b5a
JH
10910 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
10911 case MODE_DI:
10912 len+=4;
10913 break;
6ef67412 10914 default:
c725bd79 10915 fatal_insn ("unknown insn mode", insn);
6ef67412
JH
10916 }
10917 }
3071fab5 10918 }
6ef67412
JH
10919 return len;
10920}
10921/* Compute default value for "length_address" attribute. */
10922int
10923ix86_attr_length_address_default (insn)
10924 rtx insn;
10925{
10926 int i;
6c698a6d 10927 extract_insn_cached (insn);
1ccbefce
RH
10928 for (i = recog_data.n_operands - 1; i >= 0; --i)
10929 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 10930 {
6ef67412 10931 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
10932 break;
10933 }
6ef67412 10934 return 0;
3f803cd9 10935}
e075ae69
RH
10936\f
10937/* Return the maximum number of instructions a cpu can issue. */
b657fc39 10938
c237e94a 10939static int
e075ae69 10940ix86_issue_rate ()
b657fc39 10941{
e075ae69 10942 switch (ix86_cpu)
b657fc39 10943 {
e075ae69
RH
10944 case PROCESSOR_PENTIUM:
10945 case PROCESSOR_K6:
10946 return 2;
79325812 10947
e075ae69 10948 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
10949 case PROCESSOR_PENTIUM4:
10950 case PROCESSOR_ATHLON:
e075ae69 10951 return 3;
b657fc39 10952
b657fc39 10953 default:
e075ae69 10954 return 1;
b657fc39 10955 }
b657fc39
L
10956}
10957
e075ae69
RH
10958/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10959 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 10960
e075ae69
RH
10961static int
10962ix86_flags_dependant (insn, dep_insn, insn_type)
10963 rtx insn, dep_insn;
10964 enum attr_type insn_type;
10965{
10966 rtx set, set2;
b657fc39 10967
e075ae69
RH
10968 /* Simplify the test for uninteresting insns. */
10969 if (insn_type != TYPE_SETCC
10970 && insn_type != TYPE_ICMOV
10971 && insn_type != TYPE_FCMOV
10972 && insn_type != TYPE_IBR)
10973 return 0;
b657fc39 10974
e075ae69
RH
10975 if ((set = single_set (dep_insn)) != 0)
10976 {
10977 set = SET_DEST (set);
10978 set2 = NULL_RTX;
10979 }
10980 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
10981 && XVECLEN (PATTERN (dep_insn), 0) == 2
10982 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
10983 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
10984 {
10985 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10986 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10987 }
78a0d70c
ZW
10988 else
10989 return 0;
b657fc39 10990
78a0d70c
ZW
10991 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
10992 return 0;
b657fc39 10993
f5143c46 10994 /* This test is true if the dependent insn reads the flags but
78a0d70c
ZW
10995 not any other potentially set register. */
10996 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
10997 return 0;
10998
10999 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11000 return 0;
11001
11002 return 1;
e075ae69 11003}
b657fc39 11004
e075ae69
RH
11005/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11006 address with operands set by DEP_INSN. */
11007
11008static int
11009ix86_agi_dependant (insn, dep_insn, insn_type)
11010 rtx insn, dep_insn;
11011 enum attr_type insn_type;
11012{
11013 rtx addr;
11014
6ad48e84
JH
11015 if (insn_type == TYPE_LEA
11016 && TARGET_PENTIUM)
5fbdde42
RH
11017 {
11018 addr = PATTERN (insn);
11019 if (GET_CODE (addr) == SET)
11020 ;
11021 else if (GET_CODE (addr) == PARALLEL
11022 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11023 addr = XVECEXP (addr, 0, 0);
11024 else
11025 abort ();
11026 addr = SET_SRC (addr);
11027 }
e075ae69
RH
11028 else
11029 {
11030 int i;
6c698a6d 11031 extract_insn_cached (insn);
1ccbefce
RH
11032 for (i = recog_data.n_operands - 1; i >= 0; --i)
11033 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 11034 {
1ccbefce 11035 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
11036 goto found;
11037 }
11038 return 0;
11039 found:;
b657fc39
L
11040 }
11041
e075ae69 11042 return modified_in_p (addr, dep_insn);
b657fc39 11043}
a269a03c 11044
c237e94a 11045static int
e075ae69 11046ix86_adjust_cost (insn, link, dep_insn, cost)
a269a03c
JC
11047 rtx insn, link, dep_insn;
11048 int cost;
11049{
e075ae69 11050 enum attr_type insn_type, dep_insn_type;
6ad48e84 11051 enum attr_memory memory, dep_memory;
e075ae69 11052 rtx set, set2;
9b00189f 11053 int dep_insn_code_number;
a269a03c 11054
309ada50 11055 /* Anti and output depenancies have zero cost on all CPUs. */
e075ae69 11056 if (REG_NOTE_KIND (link) != 0)
309ada50 11057 return 0;
a269a03c 11058
9b00189f
JH
11059 dep_insn_code_number = recog_memoized (dep_insn);
11060
e075ae69 11061 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 11062 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 11063 return cost;
a269a03c 11064
1c71e60e
JH
11065 insn_type = get_attr_type (insn);
11066 dep_insn_type = get_attr_type (dep_insn);
9b00189f 11067
a269a03c
JC
11068 switch (ix86_cpu)
11069 {
11070 case PROCESSOR_PENTIUM:
e075ae69
RH
11071 /* Address Generation Interlock adds a cycle of latency. */
11072 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11073 cost += 1;
11074
11075 /* ??? Compares pair with jump/setcc. */
11076 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11077 cost = 0;
11078
11079 /* Floating point stores require value to be ready one cycle ealier. */
0f290768 11080 if (insn_type == TYPE_FMOV
e075ae69
RH
11081 && get_attr_memory (insn) == MEMORY_STORE
11082 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11083 cost += 1;
11084 break;
a269a03c 11085
e075ae69 11086 case PROCESSOR_PENTIUMPRO:
6ad48e84
JH
11087 memory = get_attr_memory (insn);
11088 dep_memory = get_attr_memory (dep_insn);
11089
0f290768 11090 /* Since we can't represent delayed latencies of load+operation,
e075ae69
RH
11091 increase the cost here for non-imov insns. */
11092 if (dep_insn_type != TYPE_IMOV
6ad48e84
JH
11093 && dep_insn_type != TYPE_FMOV
11094 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
e075ae69
RH
11095 cost += 1;
11096
11097 /* INT->FP conversion is expensive. */
11098 if (get_attr_fp_int_src (dep_insn))
11099 cost += 5;
11100
11101 /* There is one cycle extra latency between an FP op and a store. */
11102 if (insn_type == TYPE_FMOV
11103 && (set = single_set (dep_insn)) != NULL_RTX
11104 && (set2 = single_set (insn)) != NULL_RTX
11105 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11106 && GET_CODE (SET_DEST (set2)) == MEM)
11107 cost += 1;
6ad48e84
JH
11108
11109 /* Show ability of reorder buffer to hide latency of load by executing
11110 in parallel with previous instruction in case
11111 previous instruction is not needed to compute the address. */
11112 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11113 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11114 {
11115 /* Claim moves to take one cycle, as core can issue one load
11116 at time and the next load can start cycle later. */
11117 if (dep_insn_type == TYPE_IMOV
11118 || dep_insn_type == TYPE_FMOV)
11119 cost = 1;
11120 else if (cost > 1)
11121 cost--;
11122 }
e075ae69 11123 break;
a269a03c 11124
e075ae69 11125 case PROCESSOR_K6:
6ad48e84
JH
11126 memory = get_attr_memory (insn);
11127 dep_memory = get_attr_memory (dep_insn);
e075ae69
RH
11128 /* The esp dependency is resolved before the instruction is really
11129 finished. */
11130 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11131 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11132 return 1;
a269a03c 11133
0f290768 11134 /* Since we can't represent delayed latencies of load+operation,
e075ae69 11135 increase the cost here for non-imov insns. */
6ad48e84 11136 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
e075ae69
RH
11137 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11138
11139 /* INT->FP conversion is expensive. */
11140 if (get_attr_fp_int_src (dep_insn))
11141 cost += 5;
6ad48e84
JH
11142
11143 /* Show ability of reorder buffer to hide latency of load by executing
11144 in parallel with previous instruction in case
11145 previous instruction is not needed to compute the address. */
11146 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11147 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11148 {
11149 /* Claim moves to take one cycle, as core can issue one load
11150 at time and the next load can start cycle later. */
11151 if (dep_insn_type == TYPE_IMOV
11152 || dep_insn_type == TYPE_FMOV)
11153 cost = 1;
11154 else if (cost > 2)
11155 cost -= 2;
11156 else
11157 cost = 1;
11158 }
a14003ee 11159 break;
e075ae69 11160
309ada50 11161 case PROCESSOR_ATHLON:
6ad48e84
JH
11162 memory = get_attr_memory (insn);
11163 dep_memory = get_attr_memory (dep_insn);
11164
11165 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
0b5107cf
JH
11166 {
11167 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
11168 cost += 2;
11169 else
11170 cost += 3;
11171 }
6ad48e84
JH
11172 /* Show ability of reorder buffer to hide latency of load by executing
11173 in parallel with previous instruction in case
11174 previous instruction is not needed to compute the address. */
11175 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11176 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11177 {
11178 /* Claim moves to take one cycle, as core can issue one load
11179 at time and the next load can start cycle later. */
11180 if (dep_insn_type == TYPE_IMOV
11181 || dep_insn_type == TYPE_FMOV)
11182 cost = 0;
11183 else if (cost >= 3)
11184 cost -= 3;
11185 else
11186 cost = 0;
11187 }
309ada50 11188
a269a03c 11189 default:
a269a03c
JC
11190 break;
11191 }
11192
11193 return cost;
11194}
0a726ef1 11195
e075ae69
RH
11196static union
11197{
11198 struct ppro_sched_data
11199 {
11200 rtx decode[3];
11201 int issued_this_cycle;
11202 } ppro;
11203} ix86_sched_data;
0a726ef1 11204
e075ae69
RH
11205static enum attr_ppro_uops
11206ix86_safe_ppro_uops (insn)
11207 rtx insn;
11208{
11209 if (recog_memoized (insn) >= 0)
11210 return get_attr_ppro_uops (insn);
11211 else
11212 return PPRO_UOPS_MANY;
11213}
0a726ef1 11214
e075ae69
RH
11215static void
11216ix86_dump_ppro_packet (dump)
11217 FILE *dump;
0a726ef1 11218{
e075ae69 11219 if (ix86_sched_data.ppro.decode[0])
0a726ef1 11220 {
e075ae69
RH
11221 fprintf (dump, "PPRO packet: %d",
11222 INSN_UID (ix86_sched_data.ppro.decode[0]));
11223 if (ix86_sched_data.ppro.decode[1])
11224 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11225 if (ix86_sched_data.ppro.decode[2])
11226 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11227 fputc ('\n', dump);
11228 }
11229}
0a726ef1 11230
e075ae69 11231/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 11232
c237e94a
ZW
11233static void
11234ix86_sched_init (dump, sched_verbose, veclen)
e075ae69
RH
11235 FILE *dump ATTRIBUTE_UNUSED;
11236 int sched_verbose ATTRIBUTE_UNUSED;
c237e94a 11237 int veclen ATTRIBUTE_UNUSED;
e075ae69
RH
11238{
11239 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11240}
11241
11242/* Shift INSN to SLOT, and shift everything else down. */
11243
11244static void
11245ix86_reorder_insn (insnp, slot)
11246 rtx *insnp, *slot;
11247{
11248 if (insnp != slot)
11249 {
11250 rtx insn = *insnp;
0f290768 11251 do
e075ae69
RH
11252 insnp[0] = insnp[1];
11253 while (++insnp != slot);
11254 *insnp = insn;
0a726ef1 11255 }
e075ae69
RH
11256}
11257
c6991660 11258static void
78a0d70c
ZW
11259ix86_sched_reorder_ppro (ready, e_ready)
11260 rtx *ready;
11261 rtx *e_ready;
11262{
11263 rtx decode[3];
11264 enum attr_ppro_uops cur_uops;
11265 int issued_this_cycle;
11266 rtx *insnp;
11267 int i;
e075ae69 11268
0f290768 11269 /* At this point .ppro.decode contains the state of the three
78a0d70c 11270 decoders from last "cycle". That is, those insns that were
0f290768 11271 actually independent. But here we're scheduling for the
78a0d70c
ZW
11272 decoder, and we may find things that are decodable in the
11273 same cycle. */
e075ae69 11274
0f290768 11275 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
78a0d70c 11276 issued_this_cycle = 0;
e075ae69 11277
78a0d70c
ZW
11278 insnp = e_ready;
11279 cur_uops = ix86_safe_ppro_uops (*insnp);
0a726ef1 11280
78a0d70c
ZW
11281 /* If the decoders are empty, and we've a complex insn at the
11282 head of the priority queue, let it issue without complaint. */
11283 if (decode[0] == NULL)
11284 {
11285 if (cur_uops == PPRO_UOPS_MANY)
11286 {
11287 decode[0] = *insnp;
11288 goto ppro_done;
11289 }
11290
11291 /* Otherwise, search for a 2-4 uop unsn to issue. */
11292 while (cur_uops != PPRO_UOPS_FEW)
11293 {
11294 if (insnp == ready)
11295 break;
11296 cur_uops = ix86_safe_ppro_uops (*--insnp);
11297 }
11298
11299 /* If so, move it to the head of the line. */
11300 if (cur_uops == PPRO_UOPS_FEW)
11301 ix86_reorder_insn (insnp, e_ready);
0a726ef1 11302
78a0d70c
ZW
11303 /* Issue the head of the queue. */
11304 issued_this_cycle = 1;
11305 decode[0] = *e_ready--;
11306 }
fb693d44 11307
78a0d70c
ZW
11308 /* Look for simple insns to fill in the other two slots. */
11309 for (i = 1; i < 3; ++i)
11310 if (decode[i] == NULL)
11311 {
a151daf0 11312 if (ready > e_ready)
78a0d70c 11313 goto ppro_done;
fb693d44 11314
e075ae69
RH
11315 insnp = e_ready;
11316 cur_uops = ix86_safe_ppro_uops (*insnp);
78a0d70c
ZW
11317 while (cur_uops != PPRO_UOPS_ONE)
11318 {
11319 if (insnp == ready)
11320 break;
11321 cur_uops = ix86_safe_ppro_uops (*--insnp);
11322 }
fb693d44 11323
78a0d70c
ZW
11324 /* Found one. Move it to the head of the queue and issue it. */
11325 if (cur_uops == PPRO_UOPS_ONE)
e075ae69 11326 {
78a0d70c
ZW
11327 ix86_reorder_insn (insnp, e_ready);
11328 decode[i] = *e_ready--;
11329 issued_this_cycle++;
11330 continue;
11331 }
fb693d44 11332
78a0d70c
ZW
11333 /* ??? Didn't find one. Ideally, here we would do a lazy split
11334 of 2-uop insns, issue one and queue the other. */
11335 }
fb693d44 11336
78a0d70c
ZW
11337 ppro_done:
11338 if (issued_this_cycle == 0)
11339 issued_this_cycle = 1;
11340 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11341}
fb693d44 11342
0f290768 11343/* We are about to being issuing insns for this clock cycle.
78a0d70c 11344 Override the default sort algorithm to better slot instructions. */
c237e94a
ZW
11345static int
11346ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
78a0d70c
ZW
11347 FILE *dump ATTRIBUTE_UNUSED;
11348 int sched_verbose ATTRIBUTE_UNUSED;
11349 rtx *ready;
c237e94a 11350 int *n_readyp;
78a0d70c
ZW
11351 int clock_var ATTRIBUTE_UNUSED;
11352{
c237e94a 11353 int n_ready = *n_readyp;
78a0d70c 11354 rtx *e_ready = ready + n_ready - 1;
fb693d44 11355
fce5a9f2 11356 /* Make sure to go ahead and initialize key items in
a151daf0
JL
11357 ix86_sched_data if we are not going to bother trying to
11358 reorder the ready queue. */
78a0d70c 11359 if (n_ready < 2)
a151daf0
JL
11360 {
11361 ix86_sched_data.ppro.issued_this_cycle = 1;
11362 goto out;
11363 }
e075ae69 11364
78a0d70c
ZW
11365 switch (ix86_cpu)
11366 {
11367 default:
11368 break;
e075ae69 11369
78a0d70c
ZW
11370 case PROCESSOR_PENTIUMPRO:
11371 ix86_sched_reorder_ppro (ready, e_ready);
e075ae69 11372 break;
fb693d44
RH
11373 }
11374
e075ae69
RH
11375out:
11376 return ix86_issue_rate ();
11377}
fb693d44 11378
e075ae69
RH
11379/* We are about to issue INSN. Return the number of insns left on the
11380 ready queue that can be issued this cycle. */
b222082e 11381
c237e94a 11382static int
e075ae69
RH
11383ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11384 FILE *dump;
11385 int sched_verbose;
11386 rtx insn;
11387 int can_issue_more;
11388{
11389 int i;
11390 switch (ix86_cpu)
fb693d44 11391 {
e075ae69
RH
11392 default:
11393 return can_issue_more - 1;
fb693d44 11394
e075ae69
RH
11395 case PROCESSOR_PENTIUMPRO:
11396 {
11397 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 11398
e075ae69
RH
11399 if (uops == PPRO_UOPS_MANY)
11400 {
11401 if (sched_verbose)
11402 ix86_dump_ppro_packet (dump);
11403 ix86_sched_data.ppro.decode[0] = insn;
11404 ix86_sched_data.ppro.decode[1] = NULL;
11405 ix86_sched_data.ppro.decode[2] = NULL;
11406 if (sched_verbose)
11407 ix86_dump_ppro_packet (dump);
11408 ix86_sched_data.ppro.decode[0] = NULL;
11409 }
11410 else if (uops == PPRO_UOPS_FEW)
11411 {
11412 if (sched_verbose)
11413 ix86_dump_ppro_packet (dump);
11414 ix86_sched_data.ppro.decode[0] = insn;
11415 ix86_sched_data.ppro.decode[1] = NULL;
11416 ix86_sched_data.ppro.decode[2] = NULL;
11417 }
11418 else
11419 {
11420 for (i = 0; i < 3; ++i)
11421 if (ix86_sched_data.ppro.decode[i] == NULL)
11422 {
11423 ix86_sched_data.ppro.decode[i] = insn;
11424 break;
11425 }
11426 if (i == 3)
11427 abort ();
11428 if (i == 2)
11429 {
11430 if (sched_verbose)
11431 ix86_dump_ppro_packet (dump);
11432 ix86_sched_data.ppro.decode[0] = NULL;
11433 ix86_sched_data.ppro.decode[1] = NULL;
11434 ix86_sched_data.ppro.decode[2] = NULL;
11435 }
11436 }
11437 }
11438 return --ix86_sched_data.ppro.issued_this_cycle;
11439 }
fb693d44 11440}
9b690711
RH
11441
11442static int
11443ia32_use_dfa_pipeline_interface ()
11444{
11445 if (ix86_cpu == PROCESSOR_PENTIUM)
11446 return 1;
11447 return 0;
11448}
11449
11450/* How many alternative schedules to try. This should be as wide as the
11451 scheduling freedom in the DFA, but no wider. Making this value too
11452 large results extra work for the scheduler. */
11453
11454static int
11455ia32_multipass_dfa_lookahead ()
11456{
11457 if (ix86_cpu == PROCESSOR_PENTIUM)
11458 return 2;
11459 else
11460 return 0;
11461}
11462
a7180f70 11463\f
0e4970d7
RK
11464/* Walk through INSNS and look for MEM references whose address is DSTREG or
11465 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11466 appropriate. */
11467
11468void
11469ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11470 rtx insns;
11471 rtx dstref, srcref, dstreg, srcreg;
11472{
11473 rtx insn;
11474
11475 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11476 if (INSN_P (insn))
11477 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11478 dstreg, srcreg);
11479}
11480
11481/* Subroutine of above to actually do the updating by recursively walking
11482 the rtx. */
11483
11484static void
11485ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11486 rtx x;
11487 rtx dstref, srcref, dstreg, srcreg;
11488{
11489 enum rtx_code code = GET_CODE (x);
11490 const char *format_ptr = GET_RTX_FORMAT (code);
11491 int i, j;
11492
11493 if (code == MEM && XEXP (x, 0) == dstreg)
11494 MEM_COPY_ATTRIBUTES (x, dstref);
11495 else if (code == MEM && XEXP (x, 0) == srcreg)
11496 MEM_COPY_ATTRIBUTES (x, srcref);
11497
11498 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11499 {
11500 if (*format_ptr == 'e')
11501 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11502 dstreg, srcreg);
11503 else if (*format_ptr == 'E')
11504 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
397be6cf 11505 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
0e4970d7
RK
11506 dstreg, srcreg);
11507 }
11508}
11509\f
a7180f70
BS
11510/* Compute the alignment given to a constant that is being placed in memory.
11511 EXP is the constant and ALIGN is the alignment that the object would
11512 ordinarily have.
11513 The value of this function is used instead of that alignment to align
11514 the object. */
11515
11516int
11517ix86_constant_alignment (exp, align)
11518 tree exp;
11519 int align;
11520{
11521 if (TREE_CODE (exp) == REAL_CST)
11522 {
11523 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11524 return 64;
11525 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11526 return 128;
11527 }
11528 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
11529 && align < 256)
11530 return 256;
11531
11532 return align;
11533}
11534
11535/* Compute the alignment for a static variable.
11536 TYPE is the data type, and ALIGN is the alignment that
11537 the object would ordinarily have. The value of this function is used
11538 instead of that alignment to align the object. */
11539
11540int
11541ix86_data_alignment (type, align)
11542 tree type;
11543 int align;
11544{
11545 if (AGGREGATE_TYPE_P (type)
11546 && TYPE_SIZE (type)
11547 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11548 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11549 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11550 return 256;
11551
0d7d98ee
JH
11552 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11553 to 16byte boundary. */
11554 if (TARGET_64BIT)
11555 {
11556 if (AGGREGATE_TYPE_P (type)
11557 && TYPE_SIZE (type)
11558 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11559 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11560 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11561 return 128;
11562 }
11563
a7180f70
BS
11564 if (TREE_CODE (type) == ARRAY_TYPE)
11565 {
11566 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11567 return 64;
11568 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11569 return 128;
11570 }
11571 else if (TREE_CODE (type) == COMPLEX_TYPE)
11572 {
0f290768 11573
a7180f70
BS
11574 if (TYPE_MODE (type) == DCmode && align < 64)
11575 return 64;
11576 if (TYPE_MODE (type) == XCmode && align < 128)
11577 return 128;
11578 }
11579 else if ((TREE_CODE (type) == RECORD_TYPE
11580 || TREE_CODE (type) == UNION_TYPE
11581 || TREE_CODE (type) == QUAL_UNION_TYPE)
11582 && TYPE_FIELDS (type))
11583 {
11584 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11585 return 64;
11586 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11587 return 128;
11588 }
11589 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11590 || TREE_CODE (type) == INTEGER_TYPE)
11591 {
11592 if (TYPE_MODE (type) == DFmode && align < 64)
11593 return 64;
11594 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11595 return 128;
11596 }
11597
11598 return align;
11599}
11600
11601/* Compute the alignment for a local variable.
11602 TYPE is the data type, and ALIGN is the alignment that
11603 the object would ordinarily have. The value of this macro is used
11604 instead of that alignment to align the object. */
11605
11606int
11607ix86_local_alignment (type, align)
11608 tree type;
11609 int align;
11610{
0d7d98ee
JH
11611 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11612 to 16byte boundary. */
11613 if (TARGET_64BIT)
11614 {
11615 if (AGGREGATE_TYPE_P (type)
11616 && TYPE_SIZE (type)
11617 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11618 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11619 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11620 return 128;
11621 }
a7180f70
BS
11622 if (TREE_CODE (type) == ARRAY_TYPE)
11623 {
11624 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11625 return 64;
11626 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11627 return 128;
11628 }
11629 else if (TREE_CODE (type) == COMPLEX_TYPE)
11630 {
11631 if (TYPE_MODE (type) == DCmode && align < 64)
11632 return 64;
11633 if (TYPE_MODE (type) == XCmode && align < 128)
11634 return 128;
11635 }
11636 else if ((TREE_CODE (type) == RECORD_TYPE
11637 || TREE_CODE (type) == UNION_TYPE
11638 || TREE_CODE (type) == QUAL_UNION_TYPE)
11639 && TYPE_FIELDS (type))
11640 {
11641 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11642 return 64;
11643 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11644 return 128;
11645 }
11646 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11647 || TREE_CODE (type) == INTEGER_TYPE)
11648 {
0f290768 11649
a7180f70
BS
11650 if (TYPE_MODE (type) == DFmode && align < 64)
11651 return 64;
11652 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11653 return 128;
11654 }
11655 return align;
11656}
0ed08620
JH
11657\f
11658/* Emit RTL insns to initialize the variable parts of a trampoline.
11659 FNADDR is an RTX for the address of the function's pure code.
11660 CXT is an RTX for the static chain value for the function. */
11661void
11662x86_initialize_trampoline (tramp, fnaddr, cxt)
11663 rtx tramp, fnaddr, cxt;
11664{
11665 if (!TARGET_64BIT)
11666 {
11667 /* Compute offset from the end of the jmp to the target function. */
11668 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11669 plus_constant (tramp, 10),
11670 NULL_RTX, 1, OPTAB_DIRECT);
11671 emit_move_insn (gen_rtx_MEM (QImode, tramp),
d8bf17f9 11672 gen_int_mode (0xb9, QImode));
0ed08620
JH
11673 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11674 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
d8bf17f9 11675 gen_int_mode (0xe9, QImode));
0ed08620
JH
11676 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11677 }
11678 else
11679 {
11680 int offset = 0;
11681 /* Try to load address using shorter movl instead of movabs.
11682 We may want to support movq for kernel mode, but kernel does not use
11683 trampolines at the moment. */
11684 if (x86_64_zero_extended_value (fnaddr))
11685 {
11686 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11687 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11688 gen_int_mode (0xbb41, HImode));
0ed08620
JH
11689 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11690 gen_lowpart (SImode, fnaddr));
11691 offset += 6;
11692 }
11693 else
11694 {
11695 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11696 gen_int_mode (0xbb49, HImode));
0ed08620
JH
11697 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11698 fnaddr);
11699 offset += 10;
11700 }
11701 /* Load static chain using movabs to r10. */
11702 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11703 gen_int_mode (0xba49, HImode));
0ed08620
JH
11704 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11705 cxt);
11706 offset += 10;
11707 /* Jump to the r11 */
11708 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11709 gen_int_mode (0xff49, HImode));
0ed08620 11710 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
d8bf17f9 11711 gen_int_mode (0xe3, QImode));
0ed08620
JH
11712 offset += 3;
11713 if (offset > TRAMPOLINE_SIZE)
b531087a 11714 abort ();
0ed08620
JH
11715 }
11716}
eeb06b1b 11717\f
6a2dd09a
RS
11718#define def_builtin(MASK, NAME, TYPE, CODE) \
11719do { \
11720 if ((MASK) & target_flags) \
11721 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11722 NULL, NULL_TREE); \
eeb06b1b 11723} while (0)
bd793c65 11724
bd793c65
BS
11725struct builtin_description
11726{
8b60264b
KG
11727 const unsigned int mask;
11728 const enum insn_code icode;
11729 const char *const name;
11730 const enum ix86_builtins code;
11731 const enum rtx_code comparison;
11732 const unsigned int flag;
bd793c65
BS
11733};
11734
fbe5eb6d
BS
11735/* Used for builtins that are enabled both by -msse and -msse2. */
11736#define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11737
8b60264b 11738static const struct builtin_description bdesc_comi[] =
bd793c65 11739{
fbe5eb6d
BS
11740 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
11741 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
11742 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
11743 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
11744 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
11745 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
11746 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
11747 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
11748 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
11749 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
11750 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
11751 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 },
11752 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0 },
11753 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0 },
11754 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0 },
11755 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, LT, 1 },
11756 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, LE, 1 },
11757 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0 },
11758 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 },
11759 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 },
11760 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 },
11761 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, LT, 1 },
11762 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, LE, 1 },
11763 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0 },
bd793c65
BS
11764};
11765
8b60264b 11766static const struct builtin_description bdesc_2arg[] =
bd793c65
BS
11767{
11768 /* SSE */
fbe5eb6d
BS
11769 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11770 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11771 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11772 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11773 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11774 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11775 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11776 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11777
11778 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11779 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11780 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11781 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11782 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11783 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11784 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11785 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11786 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11787 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11788 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11789 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11790 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11791 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11792 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
fbe5eb6d
BS
11793 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11794 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11795 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11796 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
fbe5eb6d
BS
11797 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11798
11799 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11800 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11801 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11802 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11803
1877be45
JH
11804 { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
11805 { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
11806 { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
11807 { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
11808
fbe5eb6d
BS
11809 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11810 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11811 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11812 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11813 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
bd793c65
BS
11814
11815 /* MMX */
eeb06b1b
BS
11816 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11817 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11818 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11819 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11820 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11821 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11822
11823 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11824 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11825 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11826 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11827 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11828 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11829 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11830 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11831
11832 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11833 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
fbe5eb6d 11834 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
eeb06b1b
BS
11835
11836 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11837 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11838 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11839 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11840
fbe5eb6d
BS
11841 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11842 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
eeb06b1b
BS
11843
11844 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11845 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11846 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11847 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11848 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11849 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11850
fbe5eb6d
BS
11851 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11852 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11853 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11854 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
eeb06b1b
BS
11855
11856 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11857 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11858 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11859 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11860 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11861 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
bd793c65
BS
11862
11863 /* Special. */
eeb06b1b
BS
11864 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11865 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11866 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
11867
fbe5eb6d
BS
11868 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
11869 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
eeb06b1b
BS
11870
11871 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
11872 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
11873 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
11874 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
11875 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
11876 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
11877
11878 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
11879 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
11880 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
11881 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
11882 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
11883 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
11884
11885 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
11886 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
11887 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
11888 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
11889
fbe5eb6d
BS
11890 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
11891 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
11892
11893 /* SSE2 */
11894 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
11895 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
11896 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
11897 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
11898 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
11899 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
11900 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
11901 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
11902
11903 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
11904 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
11905 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
11906 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
11907 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
11908 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
11909 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
11910 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
11911 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
11912 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
11913 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
11914 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
11915 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
11916 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
11917 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
fbe5eb6d
BS
11918 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
11919 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
11920 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
11921 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
fbe5eb6d
BS
11922 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
11923
11924 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
11925 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
11926 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
11927 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
11928
1877be45
JH
11929 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
11930 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
11931 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
11932 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
fbe5eb6d
BS
11933
11934 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
11935 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
11936 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
11937
11938 /* SSE2 MMX */
11939 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
11940 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
11941 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
11942 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
11943 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
11944 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
11945 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
11946 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
11947
11948 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
11949 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
11950 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
11951 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
11952 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
11953 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
11954 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
11955 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
11956
11957 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
11958 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
11959 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
11960 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
11961
916b60b7
BS
11962 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
11963 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
11964 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
11965 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
fbe5eb6d
BS
11966
11967 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
11968 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
11969
11970 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
11971 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
11972 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
11973 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
11974 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
11975 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
11976
11977 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
11978 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
11979 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
11980 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
11981
11982 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
11983 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
11984 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
11985 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
11986 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
11987 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
11988
916b60b7
BS
11989 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
11990 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
11991 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
11992
11993 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
11994 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
11995
11996 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
11997 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
11998 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
11999 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12000 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12001 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12002
12003 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12004 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12005 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12006 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12007 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12008 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12009
12010 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12011 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12012 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12013 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12014
12015 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12016
fbe5eb6d
BS
12017 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12018 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12019 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
bd793c65
BS
12020};
12021
8b60264b 12022static const struct builtin_description bdesc_1arg[] =
bd793c65 12023{
fbe5eb6d
BS
12024 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12025 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12026
12027 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12028 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12029 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12030
12031 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12032 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12033 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12034 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12035
12036 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12037 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12038 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12039
12040 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12041
12042 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12043 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
bd793c65 12044
fbe5eb6d
BS
12045 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12046 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12047 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12048 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12049 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
bd793c65 12050
fbe5eb6d 12051 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
bd793c65 12052
fbe5eb6d
BS
12053 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12054 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12055
12056 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12057 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12058 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }
bd793c65
BS
12059};
12060
f6155fda
SS
12061void
12062ix86_init_builtins ()
12063{
12064 if (TARGET_MMX)
12065 ix86_init_mmx_sse_builtins ();
12066}
12067
12068/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
bd793c65
BS
12069 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12070 builtins. */
e37af218 12071static void
f6155fda 12072ix86_init_mmx_sse_builtins ()
bd793c65 12073{
8b60264b 12074 const struct builtin_description * d;
77ebd435 12075 size_t i;
bd793c65
BS
12076
12077 tree pchar_type_node = build_pointer_type (char_type_node);
12078 tree pfloat_type_node = build_pointer_type (float_type_node);
12079 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
916b60b7 12080 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
bd793c65
BS
12081 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12082
12083 /* Comparisons. */
12084 tree int_ftype_v4sf_v4sf
b4de2f7d
AH
12085 = build_function_type_list (integer_type_node,
12086 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12087 tree v4si_ftype_v4sf_v4sf
b4de2f7d
AH
12088 = build_function_type_list (V4SI_type_node,
12089 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12090 /* MMX/SSE/integer conversions. */
bd793c65 12091 tree int_ftype_v4sf
b4de2f7d
AH
12092 = build_function_type_list (integer_type_node,
12093 V4SF_type_node, NULL_TREE);
bd793c65 12094 tree int_ftype_v8qi
b4de2f7d 12095 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
bd793c65 12096 tree v4sf_ftype_v4sf_int
b4de2f7d
AH
12097 = build_function_type_list (V4SF_type_node,
12098 V4SF_type_node, integer_type_node, NULL_TREE);
bd793c65 12099 tree v4sf_ftype_v4sf_v2si
b4de2f7d
AH
12100 = build_function_type_list (V4SF_type_node,
12101 V4SF_type_node, V2SI_type_node, NULL_TREE);
bd793c65 12102 tree int_ftype_v4hi_int
b4de2f7d
AH
12103 = build_function_type_list (integer_type_node,
12104 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 12105 tree v4hi_ftype_v4hi_int_int
e7a60f56 12106 = build_function_type_list (V4HI_type_node, V4HI_type_node,
b4de2f7d
AH
12107 integer_type_node, integer_type_node,
12108 NULL_TREE);
bd793c65
BS
12109 /* Miscellaneous. */
12110 tree v8qi_ftype_v4hi_v4hi
b4de2f7d
AH
12111 = build_function_type_list (V8QI_type_node,
12112 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12113 tree v4hi_ftype_v2si_v2si
b4de2f7d
AH
12114 = build_function_type_list (V4HI_type_node,
12115 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 12116 tree v4sf_ftype_v4sf_v4sf_int
b4de2f7d
AH
12117 = build_function_type_list (V4SF_type_node,
12118 V4SF_type_node, V4SF_type_node,
12119 integer_type_node, NULL_TREE);
bd793c65 12120 tree v2si_ftype_v4hi_v4hi
b4de2f7d
AH
12121 = build_function_type_list (V2SI_type_node,
12122 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12123 tree v4hi_ftype_v4hi_int
b4de2f7d 12124 = build_function_type_list (V4HI_type_node,
e7a60f56 12125 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 12126 tree v4hi_ftype_v4hi_di
b4de2f7d
AH
12127 = build_function_type_list (V4HI_type_node,
12128 V4HI_type_node, long_long_unsigned_type_node,
12129 NULL_TREE);
bd793c65 12130 tree v2si_ftype_v2si_di
b4de2f7d
AH
12131 = build_function_type_list (V2SI_type_node,
12132 V2SI_type_node, long_long_unsigned_type_node,
12133 NULL_TREE);
bd793c65 12134 tree void_ftype_void
b4de2f7d 12135 = build_function_type (void_type_node, void_list_node);
bd793c65 12136 tree void_ftype_unsigned
b4de2f7d 12137 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
bd793c65 12138 tree unsigned_ftype_void
b4de2f7d 12139 = build_function_type (unsigned_type_node, void_list_node);
bd793c65 12140 tree di_ftype_void
b4de2f7d 12141 = build_function_type (long_long_unsigned_type_node, void_list_node);
e37af218 12142 tree v4sf_ftype_void
b4de2f7d 12143 = build_function_type (V4SF_type_node, void_list_node);
bd793c65 12144 tree v2si_ftype_v4sf
b4de2f7d 12145 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12146 /* Loads/stores. */
bd793c65 12147 tree void_ftype_v8qi_v8qi_pchar
b4de2f7d
AH
12148 = build_function_type_list (void_type_node,
12149 V8QI_type_node, V8QI_type_node,
12150 pchar_type_node, NULL_TREE);
bd793c65 12151 tree v4sf_ftype_pfloat
b4de2f7d 12152 = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE);
bd793c65
BS
12153 /* @@@ the type is bogus */
12154 tree v4sf_ftype_v4sf_pv2si
b4de2f7d
AH
12155 = build_function_type_list (V4SF_type_node,
12156 V4SF_type_node, pv2di_type_node, NULL_TREE);
1255c85c 12157 tree void_ftype_pv2si_v4sf
b4de2f7d
AH
12158 = build_function_type_list (void_type_node,
12159 pv2di_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12160 tree void_ftype_pfloat_v4sf
b4de2f7d
AH
12161 = build_function_type_list (void_type_node,
12162 pfloat_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12163 tree void_ftype_pdi_di
b4de2f7d
AH
12164 = build_function_type_list (void_type_node,
12165 pdi_type_node, long_long_unsigned_type_node,
12166 NULL_TREE);
916b60b7 12167 tree void_ftype_pv2di_v2di
b4de2f7d
AH
12168 = build_function_type_list (void_type_node,
12169 pv2di_type_node, V2DI_type_node, NULL_TREE);
bd793c65
BS
12170 /* Normal vector unops. */
12171 tree v4sf_ftype_v4sf
b4de2f7d 12172 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
0f290768 12173
bd793c65
BS
12174 /* Normal vector binops. */
12175 tree v4sf_ftype_v4sf_v4sf
b4de2f7d
AH
12176 = build_function_type_list (V4SF_type_node,
12177 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12178 tree v8qi_ftype_v8qi_v8qi
b4de2f7d
AH
12179 = build_function_type_list (V8QI_type_node,
12180 V8QI_type_node, V8QI_type_node, NULL_TREE);
bd793c65 12181 tree v4hi_ftype_v4hi_v4hi
b4de2f7d
AH
12182 = build_function_type_list (V4HI_type_node,
12183 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12184 tree v2si_ftype_v2si_v2si
b4de2f7d
AH
12185 = build_function_type_list (V2SI_type_node,
12186 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 12187 tree di_ftype_di_di
b4de2f7d
AH
12188 = build_function_type_list (long_long_unsigned_type_node,
12189 long_long_unsigned_type_node,
12190 long_long_unsigned_type_node, NULL_TREE);
bd793c65 12191
47f339cf 12192 tree v2si_ftype_v2sf
ae3aa00d 12193 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12194 tree v2sf_ftype_v2si
b4de2f7d 12195 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
47f339cf 12196 tree v2si_ftype_v2si
b4de2f7d 12197 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
47f339cf 12198 tree v2sf_ftype_v2sf
b4de2f7d 12199 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12200 tree v2sf_ftype_v2sf_v2sf
b4de2f7d
AH
12201 = build_function_type_list (V2SF_type_node,
12202 V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12203 tree v2si_ftype_v2sf_v2sf
b4de2f7d
AH
12204 = build_function_type_list (V2SI_type_node,
12205 V2SF_type_node, V2SF_type_node, NULL_TREE);
fbe5eb6d
BS
12206 tree pint_type_node = build_pointer_type (integer_type_node);
12207 tree pdouble_type_node = build_pointer_type (double_type_node);
12208 tree int_ftype_v2df_v2df
b4de2f7d
AH
12209 = build_function_type_list (integer_type_node,
12210 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d
BS
12211
12212 tree ti_ftype_void
b4de2f7d 12213 = build_function_type (intTI_type_node, void_list_node);
fbe5eb6d 12214 tree ti_ftype_ti_ti
b4de2f7d
AH
12215 = build_function_type_list (intTI_type_node,
12216 intTI_type_node, intTI_type_node, NULL_TREE);
fbe5eb6d 12217 tree void_ftype_pvoid
b4de2f7d 12218 = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
fbe5eb6d 12219 tree v2di_ftype_di
b4de2f7d
AH
12220 = build_function_type_list (V2DI_type_node,
12221 long_long_unsigned_type_node, NULL_TREE);
fbe5eb6d 12222 tree v4sf_ftype_v4si
b4de2f7d 12223 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 12224 tree v4si_ftype_v4sf
b4de2f7d 12225 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 12226 tree v2df_ftype_v4si
b4de2f7d 12227 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 12228 tree v4si_ftype_v2df
b4de2f7d 12229 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12230 tree v2si_ftype_v2df
b4de2f7d 12231 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12232 tree v4sf_ftype_v2df
b4de2f7d 12233 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12234 tree v2df_ftype_v2si
b4de2f7d 12235 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
fbe5eb6d 12236 tree v2df_ftype_v4sf
b4de2f7d 12237 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 12238 tree int_ftype_v2df
b4de2f7d 12239 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12240 tree v2df_ftype_v2df_int
b4de2f7d
AH
12241 = build_function_type_list (V2DF_type_node,
12242 V2DF_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12243 tree v4sf_ftype_v4sf_v2df
b4de2f7d
AH
12244 = build_function_type_list (V4SF_type_node,
12245 V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12246 tree v2df_ftype_v2df_v4sf
b4de2f7d
AH
12247 = build_function_type_list (V2DF_type_node,
12248 V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 12249 tree v2df_ftype_v2df_v2df_int
b4de2f7d
AH
12250 = build_function_type_list (V2DF_type_node,
12251 V2DF_type_node, V2DF_type_node,
12252 integer_type_node,
12253 NULL_TREE);
fbe5eb6d 12254 tree v2df_ftype_v2df_pv2si
b4de2f7d
AH
12255 = build_function_type_list (V2DF_type_node,
12256 V2DF_type_node, pv2si_type_node, NULL_TREE);
fbe5eb6d 12257 tree void_ftype_pv2si_v2df
b4de2f7d
AH
12258 = build_function_type_list (void_type_node,
12259 pv2si_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12260 tree void_ftype_pdouble_v2df
b4de2f7d
AH
12261 = build_function_type_list (void_type_node,
12262 pdouble_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12263 tree void_ftype_pint_int
b4de2f7d
AH
12264 = build_function_type_list (void_type_node,
12265 pint_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12266 tree void_ftype_v16qi_v16qi_pchar
b4de2f7d
AH
12267 = build_function_type_list (void_type_node,
12268 V16QI_type_node, V16QI_type_node,
12269 pchar_type_node, NULL_TREE);
fbe5eb6d 12270 tree v2df_ftype_pdouble
b4de2f7d 12271 = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE);
fbe5eb6d 12272 tree v2df_ftype_v2df_v2df
b4de2f7d
AH
12273 = build_function_type_list (V2DF_type_node,
12274 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12275 tree v16qi_ftype_v16qi_v16qi
b4de2f7d
AH
12276 = build_function_type_list (V16QI_type_node,
12277 V16QI_type_node, V16QI_type_node, NULL_TREE);
fbe5eb6d 12278 tree v8hi_ftype_v8hi_v8hi
b4de2f7d
AH
12279 = build_function_type_list (V8HI_type_node,
12280 V8HI_type_node, V8HI_type_node, NULL_TREE);
fbe5eb6d 12281 tree v4si_ftype_v4si_v4si
b4de2f7d
AH
12282 = build_function_type_list (V4SI_type_node,
12283 V4SI_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 12284 tree v2di_ftype_v2di_v2di
b4de2f7d
AH
12285 = build_function_type_list (V2DI_type_node,
12286 V2DI_type_node, V2DI_type_node, NULL_TREE);
fbe5eb6d 12287 tree v2di_ftype_v2df_v2df
b4de2f7d
AH
12288 = build_function_type_list (V2DI_type_node,
12289 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12290 tree v2df_ftype_v2df
b4de2f7d 12291 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12292 tree v2df_ftype_double
b4de2f7d 12293 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
fbe5eb6d 12294 tree v2df_ftype_double_double
b4de2f7d
AH
12295 = build_function_type_list (V2DF_type_node,
12296 double_type_node, double_type_node, NULL_TREE);
fbe5eb6d 12297 tree int_ftype_v8hi_int
b4de2f7d
AH
12298 = build_function_type_list (integer_type_node,
12299 V8HI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12300 tree v8hi_ftype_v8hi_int_int
b4de2f7d
AH
12301 = build_function_type_list (V8HI_type_node,
12302 V8HI_type_node, integer_type_node,
12303 integer_type_node, NULL_TREE);
916b60b7 12304 tree v2di_ftype_v2di_int
b4de2f7d
AH
12305 = build_function_type_list (V2DI_type_node,
12306 V2DI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12307 tree v4si_ftype_v4si_int
b4de2f7d
AH
12308 = build_function_type_list (V4SI_type_node,
12309 V4SI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12310 tree v8hi_ftype_v8hi_int
b4de2f7d
AH
12311 = build_function_type_list (V8HI_type_node,
12312 V8HI_type_node, integer_type_node, NULL_TREE);
916b60b7 12313 tree v8hi_ftype_v8hi_v2di
b4de2f7d
AH
12314 = build_function_type_list (V8HI_type_node,
12315 V8HI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 12316 tree v4si_ftype_v4si_v2di
b4de2f7d
AH
12317 = build_function_type_list (V4SI_type_node,
12318 V4SI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 12319 tree v4si_ftype_v8hi_v8hi
b4de2f7d
AH
12320 = build_function_type_list (V4SI_type_node,
12321 V8HI_type_node, V8HI_type_node, NULL_TREE);
916b60b7 12322 tree di_ftype_v8qi_v8qi
b4de2f7d
AH
12323 = build_function_type_list (long_long_unsigned_type_node,
12324 V8QI_type_node, V8QI_type_node, NULL_TREE);
916b60b7 12325 tree v2di_ftype_v16qi_v16qi
b4de2f7d
AH
12326 = build_function_type_list (V2DI_type_node,
12327 V16QI_type_node, V16QI_type_node, NULL_TREE);
916b60b7 12328 tree int_ftype_v16qi
b4de2f7d 12329 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
47f339cf 12330
bd793c65
BS
12331 /* Add all builtins that are more or less simple operations on two
12332 operands. */
ca7558fc 12333 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
12334 {
12335 /* Use one of the operands; the target can have a different mode for
12336 mask-generating compares. */
12337 enum machine_mode mode;
12338 tree type;
12339
12340 if (d->name == 0)
12341 continue;
12342 mode = insn_data[d->icode].operand[1].mode;
12343
bd793c65
BS
12344 switch (mode)
12345 {
fbe5eb6d
BS
12346 case V16QImode:
12347 type = v16qi_ftype_v16qi_v16qi;
12348 break;
12349 case V8HImode:
12350 type = v8hi_ftype_v8hi_v8hi;
12351 break;
12352 case V4SImode:
12353 type = v4si_ftype_v4si_v4si;
12354 break;
12355 case V2DImode:
12356 type = v2di_ftype_v2di_v2di;
12357 break;
12358 case V2DFmode:
12359 type = v2df_ftype_v2df_v2df;
12360 break;
12361 case TImode:
12362 type = ti_ftype_ti_ti;
12363 break;
bd793c65
BS
12364 case V4SFmode:
12365 type = v4sf_ftype_v4sf_v4sf;
12366 break;
12367 case V8QImode:
12368 type = v8qi_ftype_v8qi_v8qi;
12369 break;
12370 case V4HImode:
12371 type = v4hi_ftype_v4hi_v4hi;
12372 break;
12373 case V2SImode:
12374 type = v2si_ftype_v2si_v2si;
12375 break;
bd793c65
BS
12376 case DImode:
12377 type = di_ftype_di_di;
12378 break;
12379
12380 default:
12381 abort ();
12382 }
0f290768 12383
bd793c65
BS
12384 /* Override for comparisons. */
12385 if (d->icode == CODE_FOR_maskcmpv4sf3
12386 || d->icode == CODE_FOR_maskncmpv4sf3
12387 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12388 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12389 type = v4si_ftype_v4sf_v4sf;
12390
fbe5eb6d
BS
12391 if (d->icode == CODE_FOR_maskcmpv2df3
12392 || d->icode == CODE_FOR_maskncmpv2df3
12393 || d->icode == CODE_FOR_vmmaskcmpv2df3
12394 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12395 type = v2di_ftype_v2df_v2df;
12396
eeb06b1b 12397 def_builtin (d->mask, d->name, type, d->code);
bd793c65
BS
12398 }
12399
12400 /* Add the remaining MMX insns with somewhat more complicated types. */
eeb06b1b
BS
12401 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12402 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12403 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12404 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12405 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12406 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12407 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12408
12409 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12410 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12411 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12412
12413 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12414 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12415
12416 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12417 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
0f290768 12418
bd793c65 12419 /* comi/ucomi insns. */
ca7558fc 12420 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
fbe5eb6d
BS
12421 if (d->mask == MASK_SSE2)
12422 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12423 else
12424 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
bd793c65 12425
1255c85c
BS
12426 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12427 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12428 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
bd793c65 12429
fbe5eb6d
BS
12430 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12431 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12432 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12433 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12434 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12435 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
bd793c65 12436
fbe5eb6d
BS
12437 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12438 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
bd793c65 12439
fbe5eb6d 12440 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
bd793c65 12441
fbe5eb6d
BS
12442 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
12443 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
12444 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
12445 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12446 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12447 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
bd793c65 12448
fbe5eb6d
BS
12449 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12450 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12451 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12452 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
bd793c65 12453
fbe5eb6d
BS
12454 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12455 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12456 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12457 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
bd793c65 12458
fbe5eb6d 12459 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
bd793c65 12460
916b60b7 12461 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
bd793c65 12462
fbe5eb6d
BS
12463 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12464 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12465 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12466 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12467 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12468 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
bd793c65 12469
fbe5eb6d 12470 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
bd793c65 12471
47f339cf
BS
12472 /* Original 3DNow! */
12473 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12474 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12475 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12476 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12477 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12478 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12479 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12480 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12481 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12482 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12483 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12484 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12485 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12486 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12487 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12488 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12489 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12490 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12491 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12492 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
47f339cf
BS
12493
12494 /* 3DNow! extension as used in the Athlon CPU. */
12495 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12496 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12497 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12498 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12499 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12500 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12501
fbe5eb6d
BS
12502 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12503
12504 /* SSE2 */
12505 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12506 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12507
12508 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12509 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12510
12511 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
12512 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
12513 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
12514 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12515 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12516 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12517
12518 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12519 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12520 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12521 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12522
12523 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
916b60b7 12524 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
fbe5eb6d
BS
12525 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12526 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
916b60b7 12527 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
fbe5eb6d
BS
12528
12529 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12530 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12531 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
916b60b7 12532 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
fbe5eb6d
BS
12533
12534 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12535 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12536
12537 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12538
12539 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
916b60b7 12540 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
fbe5eb6d
BS
12541
12542 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12543 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12544 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12545 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12546 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12547
12548 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12549
12550 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12551 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12552
12553 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12554 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12555 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12556
12557 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12558 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12559 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12560
12561 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12562 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12563 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12564 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
12565 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
12566 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12567 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12568
12569 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
12570 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12571 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
916b60b7
BS
12572
12573 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12574 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12575 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12576
12577 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12578 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12579 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12580
12581 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12582 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12583
12584 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12585 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12586 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12587
12588 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12589 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12590 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12591
12592 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12593 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12594
12595 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
bd793c65
BS
12596}
12597
12598/* Errors in the source file can cause expand_expr to return const0_rtx
12599 where we expect a vector. To avoid crashing, use one of the vector
12600 clear instructions. */
12601static rtx
12602safe_vector_operand (x, mode)
12603 rtx x;
12604 enum machine_mode mode;
12605{
12606 if (x != const0_rtx)
12607 return x;
12608 x = gen_reg_rtx (mode);
12609
47f339cf 12610 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
bd793c65
BS
12611 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12612 : gen_rtx_SUBREG (DImode, x, 0)));
12613 else
e37af218
RH
12614 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12615 : gen_rtx_SUBREG (V4SFmode, x, 0)));
bd793c65
BS
12616 return x;
12617}
12618
12619/* Subroutine of ix86_expand_builtin to take care of binop insns. */
12620
12621static rtx
12622ix86_expand_binop_builtin (icode, arglist, target)
12623 enum insn_code icode;
12624 tree arglist;
12625 rtx target;
12626{
12627 rtx pat;
12628 tree arg0 = TREE_VALUE (arglist);
12629 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12630 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12631 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12632 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12633 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12634 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12635
12636 if (VECTOR_MODE_P (mode0))
12637 op0 = safe_vector_operand (op0, mode0);
12638 if (VECTOR_MODE_P (mode1))
12639 op1 = safe_vector_operand (op1, mode1);
12640
12641 if (! target
12642 || GET_MODE (target) != tmode
12643 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12644 target = gen_reg_rtx (tmode);
12645
12646 /* In case the insn wants input operands in modes different from
12647 the result, abort. */
12648 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
12649 abort ();
12650
12651 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12652 op0 = copy_to_mode_reg (mode0, op0);
12653 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12654 op1 = copy_to_mode_reg (mode1, op1);
12655
59bef189
RH
12656 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12657 yet one of the two must not be a memory. This is normally enforced
12658 by expanders, but we didn't bother to create one here. */
12659 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12660 op0 = copy_to_mode_reg (mode0, op0);
12661
bd793c65
BS
12662 pat = GEN_FCN (icode) (target, op0, op1);
12663 if (! pat)
12664 return 0;
12665 emit_insn (pat);
12666 return target;
12667}
12668
12669/* Subroutine of ix86_expand_builtin to take care of stores. */
12670
12671static rtx
e37af218 12672ix86_expand_store_builtin (icode, arglist)
bd793c65
BS
12673 enum insn_code icode;
12674 tree arglist;
bd793c65
BS
12675{
12676 rtx pat;
12677 tree arg0 = TREE_VALUE (arglist);
12678 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12679 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12680 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12681 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12682 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12683
12684 if (VECTOR_MODE_P (mode1))
12685 op1 = safe_vector_operand (op1, mode1);
12686
12687 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
59bef189
RH
12688
12689 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12690 op1 = copy_to_mode_reg (mode1, op1);
12691
bd793c65
BS
12692 pat = GEN_FCN (icode) (op0, op1);
12693 if (pat)
12694 emit_insn (pat);
12695 return 0;
12696}
12697
12698/* Subroutine of ix86_expand_builtin to take care of unop insns. */
12699
12700static rtx
12701ix86_expand_unop_builtin (icode, arglist, target, do_load)
12702 enum insn_code icode;
12703 tree arglist;
12704 rtx target;
12705 int do_load;
12706{
12707 rtx pat;
12708 tree arg0 = TREE_VALUE (arglist);
12709 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12710 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12711 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12712
12713 if (! target
12714 || GET_MODE (target) != tmode
12715 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12716 target = gen_reg_rtx (tmode);
12717 if (do_load)
12718 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12719 else
12720 {
12721 if (VECTOR_MODE_P (mode0))
12722 op0 = safe_vector_operand (op0, mode0);
12723
12724 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12725 op0 = copy_to_mode_reg (mode0, op0);
12726 }
12727
12728 pat = GEN_FCN (icode) (target, op0);
12729 if (! pat)
12730 return 0;
12731 emit_insn (pat);
12732 return target;
12733}
12734
12735/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12736 sqrtss, rsqrtss, rcpss. */
12737
12738static rtx
12739ix86_expand_unop1_builtin (icode, arglist, target)
12740 enum insn_code icode;
12741 tree arglist;
12742 rtx target;
12743{
12744 rtx pat;
12745 tree arg0 = TREE_VALUE (arglist);
59bef189 12746 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
bd793c65
BS
12747 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12748 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12749
12750 if (! target
12751 || GET_MODE (target) != tmode
12752 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12753 target = gen_reg_rtx (tmode);
12754
12755 if (VECTOR_MODE_P (mode0))
12756 op0 = safe_vector_operand (op0, mode0);
12757
12758 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12759 op0 = copy_to_mode_reg (mode0, op0);
fce5a9f2 12760
59bef189
RH
12761 op1 = op0;
12762 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12763 op1 = copy_to_mode_reg (mode0, op1);
fce5a9f2 12764
59bef189 12765 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
12766 if (! pat)
12767 return 0;
12768 emit_insn (pat);
12769 return target;
12770}
12771
12772/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12773
12774static rtx
12775ix86_expand_sse_compare (d, arglist, target)
8b60264b 12776 const struct builtin_description *d;
bd793c65
BS
12777 tree arglist;
12778 rtx target;
12779{
12780 rtx pat;
12781 tree arg0 = TREE_VALUE (arglist);
12782 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12783 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12784 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12785 rtx op2;
12786 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12787 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12788 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12789 enum rtx_code comparison = d->comparison;
12790
12791 if (VECTOR_MODE_P (mode0))
12792 op0 = safe_vector_operand (op0, mode0);
12793 if (VECTOR_MODE_P (mode1))
12794 op1 = safe_vector_operand (op1, mode1);
12795
12796 /* Swap operands if we have a comparison that isn't available in
12797 hardware. */
12798 if (d->flag)
12799 {
21e1b5f1
BS
12800 rtx tmp = gen_reg_rtx (mode1);
12801 emit_move_insn (tmp, op1);
bd793c65 12802 op1 = op0;
21e1b5f1 12803 op0 = tmp;
bd793c65 12804 }
21e1b5f1
BS
12805
12806 if (! target
12807 || GET_MODE (target) != tmode
12808 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
bd793c65
BS
12809 target = gen_reg_rtx (tmode);
12810
12811 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12812 op0 = copy_to_mode_reg (mode0, op0);
12813 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12814 op1 = copy_to_mode_reg (mode1, op1);
12815
12816 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12817 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12818 if (! pat)
12819 return 0;
12820 emit_insn (pat);
12821 return target;
12822}
12823
12824/* Subroutine of ix86_expand_builtin to take care of comi insns. */
12825
12826static rtx
12827ix86_expand_sse_comi (d, arglist, target)
8b60264b 12828 const struct builtin_description *d;
bd793c65
BS
12829 tree arglist;
12830 rtx target;
12831{
12832 rtx pat;
12833 tree arg0 = TREE_VALUE (arglist);
12834 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12835 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12836 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12837 rtx op2;
12838 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
12839 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
12840 enum rtx_code comparison = d->comparison;
12841
12842 if (VECTOR_MODE_P (mode0))
12843 op0 = safe_vector_operand (op0, mode0);
12844 if (VECTOR_MODE_P (mode1))
12845 op1 = safe_vector_operand (op1, mode1);
12846
12847 /* Swap operands if we have a comparison that isn't available in
12848 hardware. */
12849 if (d->flag)
12850 {
12851 rtx tmp = op1;
12852 op1 = op0;
12853 op0 = tmp;
bd793c65
BS
12854 }
12855
12856 target = gen_reg_rtx (SImode);
12857 emit_move_insn (target, const0_rtx);
12858 target = gen_rtx_SUBREG (QImode, target, 0);
12859
12860 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12861 op0 = copy_to_mode_reg (mode0, op0);
12862 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12863 op1 = copy_to_mode_reg (mode1, op1);
12864
12865 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12866 pat = GEN_FCN (d->icode) (op0, op1, op2);
12867 if (! pat)
12868 return 0;
12869 emit_insn (pat);
29628f27
BS
12870 emit_insn (gen_rtx_SET (VOIDmode,
12871 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
12872 gen_rtx_fmt_ee (comparison, QImode,
12873 gen_rtx_REG (CCmode, FLAGS_REG),
12874 const0_rtx)));
bd793c65 12875
6f1a6c5b 12876 return SUBREG_REG (target);
bd793c65
BS
12877}
12878
12879/* Expand an expression EXP that calls a built-in function,
12880 with result going to TARGET if that's convenient
12881 (and in mode MODE if that's convenient).
12882 SUBTARGET may be used as the target for computing one of EXP's operands.
12883 IGNORE is nonzero if the value is to be ignored. */
12884
12885rtx
12886ix86_expand_builtin (exp, target, subtarget, mode, ignore)
12887 tree exp;
12888 rtx target;
12889 rtx subtarget ATTRIBUTE_UNUSED;
12890 enum machine_mode mode ATTRIBUTE_UNUSED;
12891 int ignore ATTRIBUTE_UNUSED;
12892{
8b60264b 12893 const struct builtin_description *d;
77ebd435 12894 size_t i;
bd793c65
BS
12895 enum insn_code icode;
12896 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12897 tree arglist = TREE_OPERAND (exp, 1);
e37af218 12898 tree arg0, arg1, arg2;
bd793c65
BS
12899 rtx op0, op1, op2, pat;
12900 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 12901 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
12902
12903 switch (fcode)
12904 {
12905 case IX86_BUILTIN_EMMS:
12906 emit_insn (gen_emms ());
12907 return 0;
12908
12909 case IX86_BUILTIN_SFENCE:
12910 emit_insn (gen_sfence ());
12911 return 0;
12912
bd793c65 12913 case IX86_BUILTIN_PEXTRW:
fbe5eb6d
BS
12914 case IX86_BUILTIN_PEXTRW128:
12915 icode = (fcode == IX86_BUILTIN_PEXTRW
12916 ? CODE_FOR_mmx_pextrw
12917 : CODE_FOR_sse2_pextrw);
bd793c65
BS
12918 arg0 = TREE_VALUE (arglist);
12919 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12920 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12921 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12922 tmode = insn_data[icode].operand[0].mode;
12923 mode0 = insn_data[icode].operand[1].mode;
12924 mode1 = insn_data[icode].operand[2].mode;
12925
12926 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12927 op0 = copy_to_mode_reg (mode0, op0);
12928 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12929 {
12930 /* @@@ better error message */
12931 error ("selector must be an immediate");
6f1a6c5b 12932 return gen_reg_rtx (tmode);
bd793c65
BS
12933 }
12934 if (target == 0
12935 || GET_MODE (target) != tmode
12936 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12937 target = gen_reg_rtx (tmode);
12938 pat = GEN_FCN (icode) (target, op0, op1);
12939 if (! pat)
12940 return 0;
12941 emit_insn (pat);
12942 return target;
12943
12944 case IX86_BUILTIN_PINSRW:
fbe5eb6d
BS
12945 case IX86_BUILTIN_PINSRW128:
12946 icode = (fcode == IX86_BUILTIN_PINSRW
12947 ? CODE_FOR_mmx_pinsrw
12948 : CODE_FOR_sse2_pinsrw);
bd793c65
BS
12949 arg0 = TREE_VALUE (arglist);
12950 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12951 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12952 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12953 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12954 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12955 tmode = insn_data[icode].operand[0].mode;
12956 mode0 = insn_data[icode].operand[1].mode;
12957 mode1 = insn_data[icode].operand[2].mode;
12958 mode2 = insn_data[icode].operand[3].mode;
12959
12960 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12961 op0 = copy_to_mode_reg (mode0, op0);
12962 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12963 op1 = copy_to_mode_reg (mode1, op1);
12964 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12965 {
12966 /* @@@ better error message */
12967 error ("selector must be an immediate");
12968 return const0_rtx;
12969 }
12970 if (target == 0
12971 || GET_MODE (target) != tmode
12972 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12973 target = gen_reg_rtx (tmode);
12974 pat = GEN_FCN (icode) (target, op0, op1, op2);
12975 if (! pat)
12976 return 0;
12977 emit_insn (pat);
12978 return target;
12979
12980 case IX86_BUILTIN_MASKMOVQ:
fbe5eb6d
BS
12981 icode = (fcode == IX86_BUILTIN_MASKMOVQ
12982 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
12983 : CODE_FOR_sse2_maskmovdqu);
bd793c65
BS
12984 /* Note the arg order is different from the operand order. */
12985 arg1 = TREE_VALUE (arglist);
12986 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
12987 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12988 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12989 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12990 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12991 mode0 = insn_data[icode].operand[0].mode;
12992 mode1 = insn_data[icode].operand[1].mode;
12993 mode2 = insn_data[icode].operand[2].mode;
12994
5c464583 12995 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
bd793c65
BS
12996 op0 = copy_to_mode_reg (mode0, op0);
12997 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12998 op1 = copy_to_mode_reg (mode1, op1);
12999 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13000 op2 = copy_to_mode_reg (mode2, op2);
13001 pat = GEN_FCN (icode) (op0, op1, op2);
13002 if (! pat)
13003 return 0;
13004 emit_insn (pat);
13005 return 0;
13006
13007 case IX86_BUILTIN_SQRTSS:
13008 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13009 case IX86_BUILTIN_RSQRTSS:
13010 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13011 case IX86_BUILTIN_RCPSS:
13012 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13013
13014 case IX86_BUILTIN_LOADAPS:
13015 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13016
13017 case IX86_BUILTIN_LOADUPS:
13018 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13019
13020 case IX86_BUILTIN_STOREAPS:
e37af218 13021 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
bd793c65 13022 case IX86_BUILTIN_STOREUPS:
e37af218 13023 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
bd793c65
BS
13024
13025 case IX86_BUILTIN_LOADSS:
13026 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13027
13028 case IX86_BUILTIN_STORESS:
e37af218 13029 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
bd793c65 13030
0f290768 13031 case IX86_BUILTIN_LOADHPS:
bd793c65 13032 case IX86_BUILTIN_LOADLPS:
fbe5eb6d
BS
13033 case IX86_BUILTIN_LOADHPD:
13034 case IX86_BUILTIN_LOADLPD:
13035 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13036 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13037 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13038 : CODE_FOR_sse2_movlpd);
bd793c65
BS
13039 arg0 = TREE_VALUE (arglist);
13040 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13041 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13042 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13043 tmode = insn_data[icode].operand[0].mode;
13044 mode0 = insn_data[icode].operand[1].mode;
13045 mode1 = insn_data[icode].operand[2].mode;
13046
13047 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13048 op0 = copy_to_mode_reg (mode0, op0);
13049 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13050 if (target == 0
13051 || GET_MODE (target) != tmode
13052 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13053 target = gen_reg_rtx (tmode);
13054 pat = GEN_FCN (icode) (target, op0, op1);
13055 if (! pat)
13056 return 0;
13057 emit_insn (pat);
13058 return target;
0f290768 13059
bd793c65
BS
13060 case IX86_BUILTIN_STOREHPS:
13061 case IX86_BUILTIN_STORELPS:
fbe5eb6d
BS
13062 case IX86_BUILTIN_STOREHPD:
13063 case IX86_BUILTIN_STORELPD:
13064 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13065 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13066 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13067 : CODE_FOR_sse2_movlpd);
bd793c65
BS
13068 arg0 = TREE_VALUE (arglist);
13069 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13070 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13071 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13072 mode0 = insn_data[icode].operand[1].mode;
13073 mode1 = insn_data[icode].operand[2].mode;
13074
13075 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13076 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13077 op1 = copy_to_mode_reg (mode1, op1);
13078
13079 pat = GEN_FCN (icode) (op0, op0, op1);
13080 if (! pat)
13081 return 0;
13082 emit_insn (pat);
13083 return 0;
13084
13085 case IX86_BUILTIN_MOVNTPS:
e37af218 13086 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
bd793c65 13087 case IX86_BUILTIN_MOVNTQ:
e37af218 13088 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
bd793c65
BS
13089
13090 case IX86_BUILTIN_LDMXCSR:
13091 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13092 target = assign_386_stack_local (SImode, 0);
13093 emit_move_insn (target, op0);
13094 emit_insn (gen_ldmxcsr (target));
13095 return 0;
13096
13097 case IX86_BUILTIN_STMXCSR:
13098 target = assign_386_stack_local (SImode, 0);
13099 emit_insn (gen_stmxcsr (target));
13100 return copy_to_mode_reg (SImode, target);
13101
bd793c65 13102 case IX86_BUILTIN_SHUFPS:
fbe5eb6d
BS
13103 case IX86_BUILTIN_SHUFPD:
13104 icode = (fcode == IX86_BUILTIN_SHUFPS
13105 ? CODE_FOR_sse_shufps
13106 : CODE_FOR_sse2_shufpd);
bd793c65
BS
13107 arg0 = TREE_VALUE (arglist);
13108 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13109 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13110 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13111 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13112 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13113 tmode = insn_data[icode].operand[0].mode;
13114 mode0 = insn_data[icode].operand[1].mode;
13115 mode1 = insn_data[icode].operand[2].mode;
13116 mode2 = insn_data[icode].operand[3].mode;
13117
13118 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13119 op0 = copy_to_mode_reg (mode0, op0);
13120 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13121 op1 = copy_to_mode_reg (mode1, op1);
13122 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13123 {
13124 /* @@@ better error message */
13125 error ("mask must be an immediate");
6f1a6c5b 13126 return gen_reg_rtx (tmode);
bd793c65
BS
13127 }
13128 if (target == 0
13129 || GET_MODE (target) != tmode
13130 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13131 target = gen_reg_rtx (tmode);
13132 pat = GEN_FCN (icode) (target, op0, op1, op2);
13133 if (! pat)
13134 return 0;
13135 emit_insn (pat);
13136 return target;
13137
13138 case IX86_BUILTIN_PSHUFW:
fbe5eb6d
BS
13139 case IX86_BUILTIN_PSHUFD:
13140 case IX86_BUILTIN_PSHUFHW:
13141 case IX86_BUILTIN_PSHUFLW:
13142 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13143 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13144 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13145 : CODE_FOR_mmx_pshufw);
bd793c65
BS
13146 arg0 = TREE_VALUE (arglist);
13147 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13148 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13149 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13150 tmode = insn_data[icode].operand[0].mode;
29628f27
BS
13151 mode1 = insn_data[icode].operand[1].mode;
13152 mode2 = insn_data[icode].operand[2].mode;
bd793c65 13153
29628f27
BS
13154 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13155 op0 = copy_to_mode_reg (mode1, op0);
13156 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
bd793c65
BS
13157 {
13158 /* @@@ better error message */
13159 error ("mask must be an immediate");
13160 return const0_rtx;
13161 }
13162 if (target == 0
13163 || GET_MODE (target) != tmode
13164 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13165 target = gen_reg_rtx (tmode);
29628f27 13166 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
13167 if (! pat)
13168 return 0;
13169 emit_insn (pat);
13170 return target;
13171
47f339cf
BS
13172 case IX86_BUILTIN_FEMMS:
13173 emit_insn (gen_femms ());
13174 return NULL_RTX;
13175
13176 case IX86_BUILTIN_PAVGUSB:
13177 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13178
13179 case IX86_BUILTIN_PF2ID:
13180 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13181
13182 case IX86_BUILTIN_PFACC:
13183 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13184
13185 case IX86_BUILTIN_PFADD:
13186 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13187
13188 case IX86_BUILTIN_PFCMPEQ:
13189 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13190
13191 case IX86_BUILTIN_PFCMPGE:
13192 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13193
13194 case IX86_BUILTIN_PFCMPGT:
13195 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13196
13197 case IX86_BUILTIN_PFMAX:
13198 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13199
13200 case IX86_BUILTIN_PFMIN:
13201 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13202
13203 case IX86_BUILTIN_PFMUL:
13204 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13205
13206 case IX86_BUILTIN_PFRCP:
13207 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13208
13209 case IX86_BUILTIN_PFRCPIT1:
13210 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13211
13212 case IX86_BUILTIN_PFRCPIT2:
13213 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13214
13215 case IX86_BUILTIN_PFRSQIT1:
13216 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13217
13218 case IX86_BUILTIN_PFRSQRT:
13219 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13220
13221 case IX86_BUILTIN_PFSUB:
13222 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13223
13224 case IX86_BUILTIN_PFSUBR:
13225 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13226
13227 case IX86_BUILTIN_PI2FD:
13228 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13229
13230 case IX86_BUILTIN_PMULHRW:
13231 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13232
47f339cf
BS
13233 case IX86_BUILTIN_PF2IW:
13234 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13235
13236 case IX86_BUILTIN_PFNACC:
13237 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13238
13239 case IX86_BUILTIN_PFPNACC:
13240 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13241
13242 case IX86_BUILTIN_PI2FW:
13243 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13244
13245 case IX86_BUILTIN_PSWAPDSI:
13246 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13247
13248 case IX86_BUILTIN_PSWAPDSF:
13249 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13250
e37af218
RH
13251 case IX86_BUILTIN_SSE_ZERO:
13252 target = gen_reg_rtx (V4SFmode);
13253 emit_insn (gen_sse_clrv4sf (target));
bd793c65
BS
13254 return target;
13255
bd793c65
BS
13256 case IX86_BUILTIN_MMX_ZERO:
13257 target = gen_reg_rtx (DImode);
13258 emit_insn (gen_mmx_clrdi (target));
13259 return target;
13260
fbe5eb6d
BS
13261 case IX86_BUILTIN_SQRTSD:
13262 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13263 case IX86_BUILTIN_LOADAPD:
13264 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13265 case IX86_BUILTIN_LOADUPD:
13266 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13267
13268 case IX86_BUILTIN_STOREAPD:
13269 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13270 case IX86_BUILTIN_STOREUPD:
13271 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13272
13273 case IX86_BUILTIN_LOADSD:
13274 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13275
13276 case IX86_BUILTIN_STORESD:
13277 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13278
13279 case IX86_BUILTIN_SETPD1:
13280 target = assign_386_stack_local (DFmode, 0);
13281 arg0 = TREE_VALUE (arglist);
13282 emit_move_insn (adjust_address (target, DFmode, 0),
13283 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13284 op0 = gen_reg_rtx (V2DFmode);
13285 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13286 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13287 return op0;
13288
13289 case IX86_BUILTIN_SETPD:
13290 target = assign_386_stack_local (V2DFmode, 0);
13291 arg0 = TREE_VALUE (arglist);
13292 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13293 emit_move_insn (adjust_address (target, DFmode, 0),
13294 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13295 emit_move_insn (adjust_address (target, DFmode, 8),
13296 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13297 op0 = gen_reg_rtx (V2DFmode);
13298 emit_insn (gen_sse2_movapd (op0, target));
13299 return op0;
13300
13301 case IX86_BUILTIN_LOADRPD:
13302 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13303 gen_reg_rtx (V2DFmode), 1);
13304 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13305 return target;
13306
13307 case IX86_BUILTIN_LOADPD1:
13308 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13309 gen_reg_rtx (V2DFmode), 1);
13310 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13311 return target;
13312
13313 case IX86_BUILTIN_STOREPD1:
13314 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13315 case IX86_BUILTIN_STORERPD:
13316 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13317
13318 case IX86_BUILTIN_MFENCE:
13319 emit_insn (gen_sse2_mfence ());
13320 return 0;
13321 case IX86_BUILTIN_LFENCE:
13322 emit_insn (gen_sse2_lfence ());
13323 return 0;
13324
13325 case IX86_BUILTIN_CLFLUSH:
13326 arg0 = TREE_VALUE (arglist);
13327 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13328 icode = CODE_FOR_sse2_clflush;
13329 mode0 = insn_data[icode].operand[0].mode;
13330 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13331 op0 = copy_to_mode_reg (mode0, op0);
13332
13333 emit_insn (gen_sse2_clflush (op0));
13334 return 0;
13335
13336 case IX86_BUILTIN_MOVNTPD:
13337 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13338 case IX86_BUILTIN_MOVNTDQ:
916b60b7 13339 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
fbe5eb6d
BS
13340 case IX86_BUILTIN_MOVNTI:
13341 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13342
bd793c65
BS
13343 default:
13344 break;
13345 }
13346
ca7558fc 13347 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
13348 if (d->code == fcode)
13349 {
13350 /* Compares are treated specially. */
13351 if (d->icode == CODE_FOR_maskcmpv4sf3
13352 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13353 || d->icode == CODE_FOR_maskncmpv4sf3
fbe5eb6d
BS
13354 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13355 || d->icode == CODE_FOR_maskcmpv2df3
13356 || d->icode == CODE_FOR_vmmaskcmpv2df3
13357 || d->icode == CODE_FOR_maskncmpv2df3
13358 || d->icode == CODE_FOR_vmmaskncmpv2df3)
bd793c65
BS
13359 return ix86_expand_sse_compare (d, arglist, target);
13360
13361 return ix86_expand_binop_builtin (d->icode, arglist, target);
13362 }
13363
ca7558fc 13364 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
bd793c65
BS
13365 if (d->code == fcode)
13366 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 13367
ca7558fc 13368 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
bd793c65
BS
13369 if (d->code == fcode)
13370 return ix86_expand_sse_comi (d, arglist, target);
0f290768 13371
bd793c65
BS
13372 /* @@@ Should really do something sensible here. */
13373 return 0;
bd793c65 13374}
4211a8fb
JH
13375
13376/* Store OPERAND to the memory after reload is completed. This means
f710504c 13377 that we can't easily use assign_stack_local. */
4211a8fb
JH
13378rtx
13379ix86_force_to_memory (mode, operand)
13380 enum machine_mode mode;
13381 rtx operand;
13382{
898d374d 13383 rtx result;
4211a8fb
JH
13384 if (!reload_completed)
13385 abort ();
898d374d
JH
13386 if (TARGET_64BIT && TARGET_RED_ZONE)
13387 {
13388 result = gen_rtx_MEM (mode,
13389 gen_rtx_PLUS (Pmode,
13390 stack_pointer_rtx,
13391 GEN_INT (-RED_ZONE_SIZE)));
13392 emit_move_insn (result, operand);
13393 }
13394 else if (TARGET_64BIT && !TARGET_RED_ZONE)
4211a8fb 13395 {
898d374d 13396 switch (mode)
4211a8fb 13397 {
898d374d
JH
13398 case HImode:
13399 case SImode:
13400 operand = gen_lowpart (DImode, operand);
13401 /* FALLTHRU */
13402 case DImode:
4211a8fb 13403 emit_insn (
898d374d
JH
13404 gen_rtx_SET (VOIDmode,
13405 gen_rtx_MEM (DImode,
13406 gen_rtx_PRE_DEC (DImode,
13407 stack_pointer_rtx)),
13408 operand));
13409 break;
13410 default:
13411 abort ();
13412 }
13413 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13414 }
13415 else
13416 {
13417 switch (mode)
13418 {
13419 case DImode:
13420 {
13421 rtx operands[2];
13422 split_di (&operand, 1, operands, operands + 1);
13423 emit_insn (
13424 gen_rtx_SET (VOIDmode,
13425 gen_rtx_MEM (SImode,
13426 gen_rtx_PRE_DEC (Pmode,
13427 stack_pointer_rtx)),
13428 operands[1]));
13429 emit_insn (
13430 gen_rtx_SET (VOIDmode,
13431 gen_rtx_MEM (SImode,
13432 gen_rtx_PRE_DEC (Pmode,
13433 stack_pointer_rtx)),
13434 operands[0]));
13435 }
13436 break;
13437 case HImode:
13438 /* It is better to store HImodes as SImodes. */
13439 if (!TARGET_PARTIAL_REG_STALL)
13440 operand = gen_lowpart (SImode, operand);
13441 /* FALLTHRU */
13442 case SImode:
4211a8fb 13443 emit_insn (
898d374d
JH
13444 gen_rtx_SET (VOIDmode,
13445 gen_rtx_MEM (GET_MODE (operand),
13446 gen_rtx_PRE_DEC (SImode,
13447 stack_pointer_rtx)),
13448 operand));
13449 break;
13450 default:
13451 abort ();
4211a8fb 13452 }
898d374d 13453 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 13454 }
898d374d 13455 return result;
4211a8fb
JH
13456}
13457
13458/* Free operand from the memory. */
13459void
13460ix86_free_from_memory (mode)
13461 enum machine_mode mode;
13462{
898d374d
JH
13463 if (!TARGET_64BIT || !TARGET_RED_ZONE)
13464 {
13465 int size;
13466
13467 if (mode == DImode || TARGET_64BIT)
13468 size = 8;
13469 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13470 size = 2;
13471 else
13472 size = 4;
13473 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13474 to pop or add instruction if registers are available. */
13475 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13476 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13477 GEN_INT (size))));
13478 }
4211a8fb 13479}
a946dd00 13480
f84aa48a
JH
13481/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13482 QImode must go into class Q_REGS.
13483 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
892a2d68 13484 movdf to do mem-to-mem moves through integer regs. */
f84aa48a
JH
13485enum reg_class
13486ix86_preferred_reload_class (x, class)
13487 rtx x;
13488 enum reg_class class;
13489{
1877be45
JH
13490 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
13491 return NO_REGS;
f84aa48a
JH
13492 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13493 {
13494 /* SSE can't load any constant directly yet. */
13495 if (SSE_CLASS_P (class))
13496 return NO_REGS;
13497 /* Floats can load 0 and 1. */
13498 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13499 {
13500 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13501 if (MAYBE_SSE_CLASS_P (class))
13502 return (reg_class_subset_p (class, GENERAL_REGS)
13503 ? GENERAL_REGS : FLOAT_REGS);
13504 else
13505 return class;
13506 }
13507 /* General regs can load everything. */
13508 if (reg_class_subset_p (class, GENERAL_REGS))
13509 return GENERAL_REGS;
13510 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13511 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13512 return NO_REGS;
13513 }
13514 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13515 return NO_REGS;
13516 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13517 return Q_REGS;
13518 return class;
13519}
13520
13521/* If we are copying between general and FP registers, we need a memory
13522 location. The same is true for SSE and MMX registers.
13523
13524 The macro can't work reliably when one of the CLASSES is class containing
13525 registers from multiple units (SSE, MMX, integer). We avoid this by never
13526 combining those units in single alternative in the machine description.
13527 Ensure that this constraint holds to avoid unexpected surprises.
13528
13529 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13530 enforce these sanity checks. */
13531int
13532ix86_secondary_memory_needed (class1, class2, mode, strict)
13533 enum reg_class class1, class2;
13534 enum machine_mode mode;
13535 int strict;
13536{
13537 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13538 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13539 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13540 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13541 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13542 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13543 {
13544 if (strict)
13545 abort ();
13546 else
13547 return 1;
13548 }
13549 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13550 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13551 && (mode) != SImode)
13552 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13553 && (mode) != SImode));
13554}
13555/* Return the cost of moving data from a register in class CLASS1 to
a4f31c00 13556 one in class CLASS2.
f84aa48a
JH
13557
13558 It is not required that the cost always equal 2 when FROM is the same as TO;
13559 on some machines it is expensive to move between registers if they are not
13560 general registers. */
13561int
13562ix86_register_move_cost (mode, class1, class2)
13563 enum machine_mode mode;
13564 enum reg_class class1, class2;
13565{
13566 /* In case we require secondary memory, compute cost of the store followed
d631b80a
RH
13567 by load. In order to avoid bad register allocation choices, we need
13568 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
13569
f84aa48a
JH
13570 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13571 {
d631b80a
RH
13572 int cost = 1;
13573
13574 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
13575 MEMORY_MOVE_COST (mode, class1, 1));
13576 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
13577 MEMORY_MOVE_COST (mode, class2, 1));
13578
13579 /* In case of copying from general_purpose_register we may emit multiple
13580 stores followed by single load causing memory size mismatch stall.
13581 Count this as arbitarily high cost of 20. */
62415523 13582 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
d631b80a
RH
13583 cost += 20;
13584
13585 /* In the case of FP/MMX moves, the registers actually overlap, and we
13586 have to switch modes in order to treat them differently. */
13587 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
13588 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
13589 cost += 20;
13590
13591 return cost;
f84aa48a 13592 }
d631b80a 13593
92d0fb09 13594 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
13595 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13596 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
13597 return ix86_cost->mmxsse_to_integer;
13598 if (MAYBE_FLOAT_CLASS_P (class1))
13599 return ix86_cost->fp_move;
13600 if (MAYBE_SSE_CLASS_P (class1))
13601 return ix86_cost->sse_move;
13602 if (MAYBE_MMX_CLASS_P (class1))
13603 return ix86_cost->mmx_move;
f84aa48a
JH
13604 return 2;
13605}
13606
a946dd00
JH
13607/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13608int
13609ix86_hard_regno_mode_ok (regno, mode)
13610 int regno;
13611 enum machine_mode mode;
13612{
13613 /* Flags and only flags can only hold CCmode values. */
13614 if (CC_REGNO_P (regno))
13615 return GET_MODE_CLASS (mode) == MODE_CC;
13616 if (GET_MODE_CLASS (mode) == MODE_CC
13617 || GET_MODE_CLASS (mode) == MODE_RANDOM
13618 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13619 return 0;
13620 if (FP_REGNO_P (regno))
13621 return VALID_FP_MODE_P (mode);
13622 if (SSE_REGNO_P (regno))
13623 return VALID_SSE_REG_MODE (mode);
13624 if (MMX_REGNO_P (regno))
47f339cf 13625 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
a946dd00
JH
13626 /* We handle both integer and floats in the general purpose registers.
13627 In future we should be able to handle vector modes as well. */
13628 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13629 return 0;
13630 /* Take care for QImode values - they can be in non-QI regs, but then
13631 they do cause partial register stalls. */
d2836273 13632 if (regno < 4 || mode != QImode || TARGET_64BIT)
a946dd00
JH
13633 return 1;
13634 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13635}
fa79946e
JH
13636
13637/* Return the cost of moving data of mode M between a
13638 register and memory. A value of 2 is the default; this cost is
13639 relative to those in `REGISTER_MOVE_COST'.
13640
13641 If moving between registers and memory is more expensive than
13642 between two registers, you should define this macro to express the
a4f31c00
AJ
13643 relative cost.
13644
fa79946e
JH
13645 Model also increased moving costs of QImode registers in non
13646 Q_REGS classes.
13647 */
13648int
13649ix86_memory_move_cost (mode, class, in)
13650 enum machine_mode mode;
13651 enum reg_class class;
13652 int in;
13653{
13654 if (FLOAT_CLASS_P (class))
13655 {
13656 int index;
13657 switch (mode)
13658 {
13659 case SFmode:
13660 index = 0;
13661 break;
13662 case DFmode:
13663 index = 1;
13664 break;
13665 case XFmode:
13666 case TFmode:
13667 index = 2;
13668 break;
13669 default:
13670 return 100;
13671 }
13672 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13673 }
13674 if (SSE_CLASS_P (class))
13675 {
13676 int index;
13677 switch (GET_MODE_SIZE (mode))
13678 {
13679 case 4:
13680 index = 0;
13681 break;
13682 case 8:
13683 index = 1;
13684 break;
13685 case 16:
13686 index = 2;
13687 break;
13688 default:
13689 return 100;
13690 }
13691 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
13692 }
13693 if (MMX_CLASS_P (class))
13694 {
13695 int index;
13696 switch (GET_MODE_SIZE (mode))
13697 {
13698 case 4:
13699 index = 0;
13700 break;
13701 case 8:
13702 index = 1;
13703 break;
13704 default:
13705 return 100;
13706 }
13707 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13708 }
13709 switch (GET_MODE_SIZE (mode))
13710 {
13711 case 1:
13712 if (in)
13713 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13714 : ix86_cost->movzbl_load);
13715 else
13716 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13717 : ix86_cost->int_store[0] + 4);
13718 break;
13719 case 2:
13720 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13721 default:
13722 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13723 if (mode == TFmode)
13724 mode = XFmode;
3bb7e126 13725 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
d09e61b9
JH
13726 * ((int) GET_MODE_SIZE (mode)
13727 + UNITS_PER_WORD -1 ) / UNITS_PER_WORD);
fa79946e
JH
13728 }
13729}
0ecf09f9 13730
21c318ba 13731#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
2cc07db4
RH
13732static void
13733ix86_svr3_asm_out_constructor (symbol, priority)
13734 rtx symbol;
13735 int priority ATTRIBUTE_UNUSED;
13736{
13737 init_section ();
13738 fputs ("\tpushl $", asm_out_file);
13739 assemble_name (asm_out_file, XSTR (symbol, 0));
13740 fputc ('\n', asm_out_file);
13741}
13742#endif
162f023b 13743
b069de3b
SS
13744#if TARGET_MACHO
13745
13746static int current_machopic_label_num;
13747
13748/* Given a symbol name and its associated stub, write out the
13749 definition of the stub. */
13750
13751void
13752machopic_output_stub (file, symb, stub)
13753 FILE *file;
13754 const char *symb, *stub;
13755{
13756 unsigned int length;
13757 char *binder_name, *symbol_name, lazy_ptr_name[32];
13758 int label = ++current_machopic_label_num;
13759
13760 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
13761 symb = (*targetm.strip_name_encoding) (symb);
13762
13763 length = strlen (stub);
13764 binder_name = alloca (length + 32);
13765 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
13766
13767 length = strlen (symb);
13768 symbol_name = alloca (length + 32);
13769 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
13770
13771 sprintf (lazy_ptr_name, "L%d$lz", label);
13772
13773 if (MACHOPIC_PURE)
13774 machopic_picsymbol_stub_section ();
13775 else
13776 machopic_symbol_stub_section ();
13777
13778 fprintf (file, "%s:\n", stub);
13779 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13780
13781 if (MACHOPIC_PURE)
13782 {
13783 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
13784 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
13785 fprintf (file, "\tjmp %%edx\n");
13786 }
13787 else
13788 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
13789
13790 fprintf (file, "%s:\n", binder_name);
13791
13792 if (MACHOPIC_PURE)
13793 {
13794 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
13795 fprintf (file, "\tpushl %%eax\n");
13796 }
13797 else
13798 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
13799
13800 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
13801
13802 machopic_lazy_symbol_ptr_section ();
13803 fprintf (file, "%s:\n", lazy_ptr_name);
13804 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13805 fprintf (file, "\t.long %s\n", binder_name);
13806}
13807#endif /* TARGET_MACHO */
13808
162f023b
JH
13809/* Order the registers for register allocator. */
13810
13811void
13812x86_order_regs_for_local_alloc ()
13813{
13814 int pos = 0;
13815 int i;
13816
13817 /* First allocate the local general purpose registers. */
13818 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13819 if (GENERAL_REGNO_P (i) && call_used_regs[i])
13820 reg_alloc_order [pos++] = i;
13821
13822 /* Global general purpose registers. */
13823 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13824 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
13825 reg_alloc_order [pos++] = i;
13826
13827 /* x87 registers come first in case we are doing FP math
13828 using them. */
13829 if (!TARGET_SSE_MATH)
13830 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13831 reg_alloc_order [pos++] = i;
fce5a9f2 13832
162f023b
JH
13833 /* SSE registers. */
13834 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
13835 reg_alloc_order [pos++] = i;
13836 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
13837 reg_alloc_order [pos++] = i;
13838
13839 /* x87 registerts. */
13840 if (TARGET_SSE_MATH)
13841 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13842 reg_alloc_order [pos++] = i;
13843
13844 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
13845 reg_alloc_order [pos++] = i;
13846
13847 /* Initialize the rest of array as we do not allocate some registers
13848 at all. */
13849 while (pos < FIRST_PSEUDO_REGISTER)
13850 reg_alloc_order [pos++] = 0;
13851}
194734e9
JH
13852
13853void
13854x86_output_mi_thunk (file, delta, function)
13855 FILE *file;
13856 int delta;
13857 tree function;
13858{
13859 tree parm;
13860 rtx xops[3];
13861
13862 if (ix86_regparm > 0)
13863 parm = TYPE_ARG_TYPES (TREE_TYPE (function));
13864 else
13865 parm = NULL_TREE;
13866 for (; parm; parm = TREE_CHAIN (parm))
13867 if (TREE_VALUE (parm) == void_type_node)
13868 break;
13869
13870 xops[0] = GEN_INT (delta);
13871 if (TARGET_64BIT)
13872 {
13873 int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
13874 xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
13875 output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
13876 if (flag_pic)
13877 {
13878 fprintf (file, "\tjmp *");
13879 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13880 fprintf (file, "@GOTPCREL(%%rip)\n");
13881 }
13882 else
13883 {
13884 fprintf (file, "\tjmp ");
13885 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13886 fprintf (file, "\n");
13887 }
13888 }
13889 else
13890 {
13891 if (parm)
13892 xops[1] = gen_rtx_REG (SImode, 0);
13893 else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
13894 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
13895 else
13896 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
13897 output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);
13898
13899 if (flag_pic)
13900 {
13901 xops[0] = pic_offset_table_rtx;
13902 xops[1] = gen_label_rtx ();
5fc0e5df 13903 xops[2] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
194734e9
JH
13904
13905 if (ix86_regparm > 2)
13906 abort ();
13907 output_asm_insn ("push{l}\t%0", xops);
13908 output_asm_insn ("call\t%P1", xops);
13909 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
13910 output_asm_insn ("pop{l}\t%0", xops);
13911 output_asm_insn
13912 ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
13913 xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
13914 output_asm_insn
13915 ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
13916 asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
13917 asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
13918 }
13919 else
13920 {
13921 fprintf (file, "\tjmp ");
13922 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13923 fprintf (file, "\n");
13924 }
13925 }
13926}
e2500fed 13927
e932b21b
JH
13928int
13929x86_field_alignment (field, computed)
13930 tree field;
13931 int computed;
13932{
13933 enum machine_mode mode;
ad9335eb
JJ
13934 tree type = TREE_TYPE (field);
13935
13936 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
e932b21b 13937 return computed;
ad9335eb
JJ
13938 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
13939 ? get_inner_array_type (type) : type);
39e3a681
JJ
13940 if (mode == DFmode || mode == DCmode
13941 || GET_MODE_CLASS (mode) == MODE_INT
13942 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
e932b21b
JH
13943 return MIN (32, computed);
13944 return computed;
13945}
13946
2a500b9e
JH
13947/* Implement machine specific optimizations.
13948 At the moment we implement single transformation: AMD Athlon works faster
13949 when RET is not destination of conditional jump or directly preceeded
13950 by other jump instruction. We avoid the penalty by inserting NOP just
13951 before the RET instructions in such cases. */
13952void
13953x86_machine_dependent_reorg (first)
13954 rtx first ATTRIBUTE_UNUSED;
13955{
13956 edge e;
13957
13958 if (!TARGET_ATHLON || !optimize || optimize_size)
13959 return;
13960 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
13961 {
13962 basic_block bb = e->src;
13963 rtx ret = bb->end;
13964 rtx prev;
13965 bool insert = false;
13966
13967 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
13968 continue;
13969 prev = prev_nonnote_insn (ret);
13970 if (prev && GET_CODE (prev) == CODE_LABEL)
13971 {
13972 edge e;
13973 for (e = bb->pred; e; e = e->pred_next)
13974 if (EDGE_FREQUENCY (e) && e->src->index > 0
13975 && !(e->flags & EDGE_FALLTHRU))
13976 insert = 1;
13977 }
13978 if (!insert)
13979 {
13980 prev = prev_real_insn (ret);
13981 if (prev && GET_CODE (prev) == JUMP_INSN
13982 && any_condjump_p (prev))
13983 insert = 1;
13984 }
13985 if (insert)
13986 emit_insn_before (gen_nop (), ret);
13987 }
13988}
13989
e2500fed 13990#include "gt-i386.h"
This page took 3.421065 seconds and 5 git commands to generate.