Index: gcc.fsf.stkalign2/gcc/doc/extend.texi =================================================================== *** gcc.fsf.stkalign2/gcc/doc/extend.texi (revision 110663) --- gcc.fsf.stkalign2/gcc/doc/extend.texi (working copy) *************** SSE registers instead of on the stack. *** 2207,2212 **** --- 2207,2226 ---- variable number of arguments will continue to pass all of their floating point arguments on the stack. + @item force_align_arg_pointer + @cindex @code{force_align_arg_pointer} attribute + On the Intel x86, the @code{force_align_arg_pointer} attribute may be + applied to individual function definitions, generating an alternate + prologue and epilogue that realigns the runtime stack. This supports + mixing legacy codes that run with a 4-byte aligned stack with modern + codes that keep a 16-byte stack for SSE compatibility. The alternate + prologue/epilogue is slower and bigger than the regular one, and it + requires one dedicated register for the life of the function. This + also lowers the number of registers available if used in conjunction + with the @code{regparm} attribute. The @code{force_align_arg_pointer} + attribute is incompatible with nested functions; this is considered a + hard error. + @item returns_twice @cindex @code{returns_twice} attribute The @code{returns_twice} attribute tells the compiler that a function may Index: gcc.fsf.stkalign2/gcc/doc/invoke.texi =================================================================== *** gcc.fsf.stkalign2/gcc/doc/invoke.texi (revision 110663) --- gcc.fsf.stkalign2/gcc/doc/invoke.texi (working copy) *************** Objective-C and Objective-C++ Dialects}. *** 532,537 **** --- 532,538 ---- -mthreads -mno-align-stringops -minline-all-stringops @gol -mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol -m96bit-long-double -mregparm=@var{num} -msseregparm @gol + -mstackrealign @gol -momit-leaf-frame-pointer -mno-red-zone -mno-tls-direct-seg-refs @gol -mcmodel=@var{code-model} @gol -m32 -m64 -mlarge-data-threshold=@var{num}} *************** function by using the function attribute *** 9379,9384 **** --- 9380,9399 ---- modules with the same value, including any libraries. This includes the system libraries and startup modules. + @item -mstackrealign + @opindex mstackrealign + Realign the stack at entry. On the Intel x86, the + @option{-mstackrealign} option will generate an alternate + prologue/epilogue that realigns the runtime stack. This supports + mixing legacy codes that keep a 4-byte aligned stack with modern codes + that keep a 16-byte stack for SSE compatibility. The alternate + prologue and epilogue are slower and bigger than the regular ones, and + they require one dedicated register for the entire function. This + also lowers the number of registers available if used in conjunction + with the @code{regparm} attribute. The @code{force_align_arg_pointer} + attribute is incompatible with the nested function prologue; this is + considered a hard error. + @item -mpreferred-stack-boundary=@var{num} @opindex mpreferred-stack-boundary Attempt to keep the stack boundary aligned to a 2 raised to @var{num} Index: gcc.fsf.stkalign2/gcc/config/i386/i386.opt =================================================================== *** gcc.fsf.stkalign2/gcc/config/i386/i386.opt (revision 110663) --- gcc.fsf.stkalign2/gcc/config/i386/i386.opt (working copy) *************** msselibm *** 205,210 **** --- 205,214 ---- Target Mask(SSELIBM) Use SSE2 ABI libgcc-math routines if using SSE math + mstackrealign + Target Report Var(ix86_force_align_arg_pointer) + Realign stack in prologue + msvr3-shlib Target Report Mask(SVR3_SHLIB) Uninitialized locals in .bss Index: gcc.fsf.stkalign2/gcc/config/i386/i386.c =================================================================== *** gcc.fsf.stkalign2/gcc/config/i386/i386.c (revision 110663) --- gcc.fsf.stkalign2/gcc/config/i386/i386.c (working copy) *************** int x86_prefetch_sse; *** 1036,1041 **** --- 1036,1045 ---- /* ix86_regparm_string as a number */ static int ix86_regparm; + /* -mstackrealign option */ + extern int ix86_force_align_arg_pointer; + static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer"; + /* Preferred alignment for stack boundary in bits. */ unsigned int ix86_preferred_stack_boundary; *************** const struct attribute_spec ix86_attribu *** 2202,2207 **** --- 2206,2214 ---- /* Sseregparm attribute says we are using x86_64 calling conventions for FP arguments. */ { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute }, + /* force_align_arg_pointer says this function realigns the stack at entry. */ + { (const char *)&ix86_force_align_arg_pointer_string, 0, 0, + false, true, true, ix86_handle_cconv_attribute }, #if TARGET_DLLIMPORT_DECL_ATTRIBUTES { "dllimport", 0, 0, false, false, false, handle_dll_attribute }, { "dllexport", 0, 0, false, false, false, handle_dll_attribute }, *************** ix86_function_regparm (tree type, tree d *** 2483,2488 **** --- 2490,2499 ---- && decl_function_context (decl) && !DECL_NO_STATIC_CHAIN (decl)) local_regparm = 2; + /* If the function realigns its stackpointer, it + uses %ecx to reach unaligned arguments. */ + if (cfun->machine->force_align_arg_pointer) + local_regparm = 2; /* Each global register variable increases register preassure, so the more global reg vars there are, the smaller regparm optimization use, unless requested by the user explicitly. */ *************** pro_epilogue_adjust_stack (rtx dest, rtx *** 5132,5142 **** static rtx ix86_internal_arg_pointer (void) { ! if (FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN ! && DECL_NAME (current_function_decl) ! && MAIN_NAME_P (DECL_NAME (current_function_decl)) ! && DECL_FILE_SCOPE_P (current_function_decl)) ! { cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2); return copy_to_reg (cfun->machine->force_align_arg_pointer); } --- 5143,5167 ---- static rtx ix86_internal_arg_pointer (void) { ! bool has_force_align_arg_pointer = ! (0 != lookup_attribute (ix86_force_align_arg_pointer_string, ! TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))); ! if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN ! && DECL_NAME (current_function_decl) ! && MAIN_NAME_P (DECL_NAME (current_function_decl)) ! && DECL_FILE_SCOPE_P (current_function_decl)) ! || ix86_force_align_arg_pointer ! || has_force_align_arg_pointer) ! { ! if (!DECL_FILE_SCOPE_P (current_function_decl)) ! { ! if (ix86_force_align_arg_pointer) ! warning (0, "-mstackrealign ignored for nested functions"); ! if (has_force_align_arg_pointer) ! error ("%s not supported for nested functions", ! ix86_force_align_arg_pointer_string); ! return virtual_incoming_args_rtx; ! } cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2); return copy_to_reg (cfun->machine->force_align_arg_pointer); }