diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index bb0b890..d9dc571 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -7062,11 +7062,8 @@ static void setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) { rtx save_area, mem; - rtx label; - rtx tmp_reg; - rtx nsse_reg; alias_set_type set; - int i; + int i, max; /* GPR size of varargs save area. */ if (cfun->va_list_gpr_size) @@ -7087,10 +7084,11 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) save_area = frame_pointer_rtx; set = get_varargs_alias_set (); - for (i = cum->regno; - i < X86_64_REGPARM_MAX - && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD; - i++) + max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD; + if (max > X86_64_REGPARM_MAX) + max = X86_64_REGPARM_MAX; + + for (i = cum->regno; i < max; i++) { mem = gen_rtx_MEM (Pmode, plus_constant (save_area, i * UNITS_PER_WORD)); @@ -7102,33 +7100,41 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) if (ix86_varargs_fpr_size) { + enum machine_mode smode; + rtx label, test; + /* Now emit code to save SSE registers. The AX parameter contains number - of SSE parameter registers used to call this function. We use - sse_prologue_save insn template that produces computed jump across - SSE saves. We need some preparation work to get this working. */ + of SSE parameter registers used to call this function, though all we + actually check here is the zero/non-zero status. */ label = gen_label_rtx (); + test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx); + emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1), + label)); + + /* If we've determined that we're only loading scalars (and not + vector data) then we can store doubles instead. */ + if (crtl->stack_alignment_needed < 128) + smode = DFmode; + else + smode = V4SFmode; - nsse_reg = gen_reg_rtx (Pmode); - emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG))); - - /* Compute address of memory block we save into. We always use pointer - pointing 127 bytes after first byte to store - this is needed to keep - instruction size limited by 4 bytes (5 bytes for AVX) with one - byte displacement. */ - tmp_reg = gen_reg_rtx (Pmode); - emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, - plus_constant (save_area, - ix86_varargs_gpr_size + 127))); - mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127)); - MEM_NOTRAP_P (mem) = 1; - set_mem_alias_set (mem, set); - set_mem_align (mem, 64); + max = cum->sse_regno + cfun->va_list_fpr_size / 16; + if (max > X86_64_SSE_REGPARM_MAX) + max = X86_64_SSE_REGPARM_MAX; - /* And finally do the dirty job! */ - emit_insn (gen_sse_prologue_save (mem, nsse_reg, - GEN_INT (cum->sse_regno), label, - gen_reg_rtx (Pmode))); + for (i = cum->sse_regno; i < max; ++i) + { + mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size); + mem = gen_rtx_MEM (smode, mem); + MEM_NOTRAP_P (mem) = 1; + set_mem_alias_set (mem, set); + set_mem_align (mem, GET_MODE_ALIGNMENT (smode)); + + emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i))); + } + + emit_label (label); } } diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 88b4029..6616da2 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -79,13 +79,11 @@ ;; Prologue support UNSPEC_STACK_ALLOC UNSPEC_SET_GOT - UNSPEC_SSE_PROLOGUE_SAVE UNSPEC_REG_SAVE UNSPEC_DEF_CFA UNSPEC_SET_RIP UNSPEC_SET_GOT_OFFSET UNSPEC_MEMORY_BLOCKAGE - UNSPEC_SSE_PROLOGUE_SAVE_LOW ;; TLS support UNSPEC_TP @@ -17825,179 +17823,6 @@ { return ASM_SHORT "0x0b0f"; } [(set_attr "length" "2")]) -(define_expand "sse_prologue_save" - [(parallel [(set (match_operand:BLK 0 "" "") - (unspec:BLK [(reg:DI XMM0_REG) - (reg:DI XMM1_REG) - (reg:DI XMM2_REG) - (reg:DI XMM3_REG) - (reg:DI XMM4_REG) - (reg:DI XMM5_REG) - (reg:DI XMM6_REG) - (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE)) - (clobber (reg:CC FLAGS_REG)) - (clobber (match_operand:DI 1 "register_operand" "")) - (use (match_operand:DI 2 "immediate_operand" "")) - (use (label_ref:DI (match_operand 3 "" ""))) - (clobber (match_operand:DI 4 "register_operand" "")) - (use (match_dup 1))])] - "TARGET_64BIT" - "") - -;; Pre-reload version of prologue save. Until after prologue generation we don't know -;; what the size of save instruction will be. -;; Operand 0+operand 6 is the memory save area -;; Operand 1 is number of registers to save (will get overwritten to operand 5) -;; Operand 2 is number of non-vaargs SSE arguments -;; Operand 3 is label starting the save block -;; Operand 4 is used for temporary computation of jump address -(define_insn "*sse_prologue_save_insn1" - [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R") - (match_operand:DI 6 "const_int_operand" "n"))) - (unspec:BLK [(reg:DI XMM0_REG) - (reg:DI XMM1_REG) - (reg:DI XMM2_REG) - (reg:DI XMM3_REG) - (reg:DI XMM4_REG) - (reg:DI XMM5_REG) - (reg:DI XMM6_REG) - (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE)) - (clobber (reg:CC FLAGS_REG)) - (clobber (match_operand:DI 1 "register_operand" "=r")) - (use (match_operand:DI 2 "const_int_operand" "i")) - (use (label_ref:DI (match_operand 3 "" "X"))) - (clobber (match_operand:DI 4 "register_operand" "=&r")) - (use (match_operand:DI 5 "register_operand" "1"))] - "TARGET_64BIT - && INTVAL (operands[6]) + X86_64_SSE_REGPARM_MAX * 16 - 16 < 128 - && INTVAL (operands[6]) + INTVAL (operands[2]) * 16 >= -128" - "#" - [(set_attr "type" "other") - (set_attr "memory" "store") - (set_attr "mode" "DI")]) - -;; We know size of save instruction; expand the computation of jump address -;; in the jumptable. -(define_split - [(parallel [(set (match_operand:BLK 0 "" "") - (unspec:BLK [(reg:DI XMM0_REG) - (reg:DI XMM1_REG) - (reg:DI XMM2_REG) - (reg:DI XMM3_REG) - (reg:DI XMM4_REG) - (reg:DI XMM5_REG) - (reg:DI XMM6_REG) - (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE)) - (clobber (reg:CC FLAGS_REG)) - (clobber (match_operand:DI 1 "register_operand" "")) - (use (match_operand:DI 2 "const_int_operand" "")) - (use (match_operand 3 "" "")) - (clobber (match_operand:DI 4 "register_operand" "")) - (use (match_operand:DI 5 "register_operand" ""))])] - "reload_completed" - [(parallel [(set (match_dup 0) - (unspec:BLK [(reg:DI XMM0_REG) - (reg:DI XMM1_REG) - (reg:DI XMM2_REG) - (reg:DI XMM3_REG) - (reg:DI XMM4_REG) - (reg:DI XMM5_REG) - (reg:DI XMM6_REG) - (reg:DI XMM7_REG)] - UNSPEC_SSE_PROLOGUE_SAVE_LOW)) - (use (match_dup 1)) - (use (match_dup 2)) - (use (match_dup 3)) - (use (match_dup 5))])] -{ - /* Movaps is 4 bytes, AVX and movsd is 5 bytes. */ - int size = 4 + (TARGET_AVX || crtl->stack_alignment_needed < 128); - - /* Compute address to jump to: - label - eax*size + nnamed_sse_arguments*size. */ - if (size == 5) - emit_insn (gen_rtx_SET (VOIDmode, operands[4], - gen_rtx_PLUS - (Pmode, - gen_rtx_MULT (Pmode, operands[1], - GEN_INT (4)), - operands[1]))); - else if (size == 4) - emit_insn (gen_rtx_SET (VOIDmode, operands[4], - gen_rtx_MULT (Pmode, operands[1], - GEN_INT (4)))); - else - gcc_unreachable (); - if (INTVAL (operands[2])) - emit_move_insn - (operands[1], - gen_rtx_CONST (DImode, - gen_rtx_PLUS (DImode, - operands[3], - GEN_INT (INTVAL (operands[2]) - * size)))); - else - emit_move_insn (operands[1], operands[3]); - emit_insn (gen_subdi3 (operands[1], operands[1], operands[4])); - operands[5] = GEN_INT (size); -}) - -(define_insn "sse_prologue_save_insn" - [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R") - (match_operand:DI 4 "const_int_operand" "n"))) - (unspec:BLK [(reg:DI XMM0_REG) - (reg:DI XMM1_REG) - (reg:DI XMM2_REG) - (reg:DI XMM3_REG) - (reg:DI XMM4_REG) - (reg:DI XMM5_REG) - (reg:DI XMM6_REG) - (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE_LOW)) - (use (match_operand:DI 1 "register_operand" "r")) - (use (match_operand:DI 2 "const_int_operand" "i")) - (use (label_ref:DI (match_operand 3 "" "X"))) - (use (match_operand:DI 5 "const_int_operand" "i"))] - "TARGET_64BIT - && INTVAL (operands[4]) + X86_64_SSE_REGPARM_MAX * 16 - 16 < 128 - && INTVAL (operands[4]) + INTVAL (operands[2]) * 16 >= -128" -{ - int i; - operands[0] = gen_rtx_MEM (Pmode, - gen_rtx_PLUS (Pmode, operands[0], operands[4])); - /* VEX instruction with a REX prefix will #UD. */ - if (TARGET_AVX && GET_CODE (XEXP (operands[0], 0)) != PLUS) - gcc_unreachable (); - - output_asm_insn ("jmp\t%A1", operands); - for (i = X86_64_SSE_REGPARM_MAX - 1; i >= INTVAL (operands[2]); i--) - { - operands[4] = adjust_address (operands[0], DImode, i*16); - operands[5] = gen_rtx_REG (TImode, SSE_REGNO (i)); - PUT_MODE (operands[4], TImode); - if (GET_CODE (XEXP (operands[0], 0)) != PLUS) - output_asm_insn ("rex", operands); - if (crtl->stack_alignment_needed < 128) - output_asm_insn ("%vmovsd\t{%5, %4|%4, %5}", operands); - else - output_asm_insn ("%vmovaps\t{%5, %4|%4, %5}", operands); - } - targetm.asm_out.internal_label (asm_out_file, "L", - CODE_LABEL_NUMBER (operands[3])); - return ""; -} - [(set_attr "type" "other") - (set_attr "length_immediate" "0") - (set_attr "length_address" "0") - ;; 2 bytes for jump and opernds[4] bytes for each save. - (set (attr "length") - (plus (const_int 2) - (mult (symbol_ref ("INTVAL (operands[5])")) - (symbol_ref ("X86_64_SSE_REGPARM_MAX - INTVAL (operands[2])"))))) - (set_attr "memory" "store") - (set_attr "modrm" "0") - (set_attr "prefix" "maybe_vex") - (set_attr "mode" "DI")]) - (define_expand "prefetch" [(prefetch (match_operand 0 "address_operand" "") (match_operand:SI 1 "const_int_operand" "")