gcc/ 2009-09-26 H.J. Lu PR target/40838 * function.c (allocate_struct_function): Initialize forced_stack_alignment and forced_stack_mode. * function.h (function): Add forced_stack_alignment and forced_stack_mode. * tree-vect-analyze.c (vect_verify_datarefs_alignment): Update forced_stack_alignment and forced_stack_mode. * config/i386/i386.c (VALID_SSE_VECTOR_MODE): New. (ix86_update_stack_boundary): In 32bit, use STACK_BOUNDARY if any SSE variable are forced on stack. (ix86_minimum_alignment): In 32bit, update forced_stack_alignment and forced_stack_mode if any SSE variables are put on stack. gcc/testsuite/ 2009-09-26 H.J. Lu PR target/40838 * gcc.target/i386/incoming-6.c: New. * gcc.target/i386/incoming-7.c: Likewise. * gcc.target/i386/incoming-8.c: Likewise. * gcc.target/i386/incoming-9.c: Likewise. * gcc.target/i386/incoming-10.c: Likewise. * gcc.target/i386/incoming-11.c: Likewise. Index: gcc/testsuite/gcc.target/i386/incoming-11.c =================================================================== --- gcc/testsuite/gcc.target/i386/incoming-11.c (revision 0) +++ gcc/testsuite/gcc.target/i386/incoming-11.c (revision 0) @@ -0,0 +1,16 @@ +/* PR target/40838 */ +/* { dg-do compile { target { { ! *-*-darwin* } && ilp32 } } } */ +/* { dg-options "-w -fomit-frame-pointer -O3 -march=barcelona -mpreferred-stack-boundary=4" } */ + +void g(); + +int p[100]; +int q[100]; + +void f() +{ + int i; + for (i = 0; i < 100; i++) p[i] = 0; + g(); + for (i = 0; i < 100; i++) q[i] = 0; +} Index: gcc/testsuite/gcc.target/i386/incoming-7.c =================================================================== --- gcc/testsuite/gcc.target/i386/incoming-7.c (revision 0) +++ gcc/testsuite/gcc.target/i386/incoming-7.c (revision 0) @@ -0,0 +1,16 @@ +/* PR target/40838 */ +/* { dg-do compile { target { { ! *-*-darwin* } && ilp32 } } } */ +/* { dg-options "-w -O2 -msse2 -mpreferred-stack-boundary=4" } */ + +typedef int v4si __attribute__ ((vector_size (16))); + +extern v4si y(v4si, v4si, v4si, v4si, v4si); + +extern v4si s1, s2; + +v4si x(void) +{ + return y(s1, s2, s1, s2, s2); +} + +/* { dg-final { scan-assembler "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */ Index: gcc/testsuite/gcc.target/i386/incoming-9.c =================================================================== --- gcc/testsuite/gcc.target/i386/incoming-9.c (revision 0) +++ gcc/testsuite/gcc.target/i386/incoming-9.c (revision 0) @@ -0,0 +1,18 @@ +/* PR target/40838 */ +/* { dg-do compile { target { { ! *-*-darwin* } && ilp32 } } } */ +/* { dg-options "-w -O3 -mno-sse -mpreferred-stack-boundary=4" } */ + +float +foo (float f) +{ + float array[128]; + float x; + int i; + for (i = 0; i < sizeof(array) / sizeof(*array); i++) + array[i] = f; + for (i = 0; i < sizeof(array) / sizeof(*array); i++) + x += array[i]; + return x; +} + +/* { dg-final { scan-assembler-not "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */ Index: gcc/testsuite/gcc.target/i386/incoming-10.c =================================================================== --- gcc/testsuite/gcc.target/i386/incoming-10.c (revision 0) +++ gcc/testsuite/gcc.target/i386/incoming-10.c (revision 0) @@ -0,0 +1,19 @@ +/* PR target/40838 */ +/* { dg-do compile { target { { ! *-*-darwin* } && ilp32 } } } */ +/* { dg-options "-w -fomit-frame-pointer -O3 -march=barcelona -mpreferred-stack-boundary=4" } */ + +struct s { + int x[8]; +}; + +void g(struct s *); + +void f() +{ + int i; + struct s s; + for (i = 0; i < sizeof(s.x) / sizeof(*s.x); i++) s.x[i] = 0; + g(&s); +} + +/* { dg-final { scan-assembler "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */ Index: gcc/testsuite/gcc.target/i386/incoming-6.c =================================================================== --- gcc/testsuite/gcc.target/i386/incoming-6.c (revision 0) +++ gcc/testsuite/gcc.target/i386/incoming-6.c (revision 0) @@ -0,0 +1,17 @@ +/* PR target/40838 */ +/* { dg-do compile { target { { ! *-*-darwin* } && ilp32 } } } */ +/* { dg-options "-w -O2 -msse2 -mpreferred-stack-boundary=4" } */ + +typedef int v4si __attribute__ ((vector_size (16))); + +extern v4si y(v4si *s3); + +extern v4si s1, s2; + +v4si x(void) +{ + v4si s3 = s1 + s2; + return y(&s3); +} + +/* { dg-final { scan-assembler "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */ Index: gcc/testsuite/gcc.target/i386/incoming-8.c =================================================================== --- gcc/testsuite/gcc.target/i386/incoming-8.c (revision 0) +++ gcc/testsuite/gcc.target/i386/incoming-8.c (revision 0) @@ -0,0 +1,18 @@ +/* PR target/40838 */ +/* { dg-do compile { target { { ! *-*-darwin* } && ilp32 } } } */ +/* { dg-options "-w -O3 -msse2 -mpreferred-stack-boundary=4" } */ + +float +foo (float f) +{ + float array[128]; + float x; + int i; + for (i = 0; i < sizeof(array) / sizeof(*array); i++) + array[i] = f; + for (i = 0; i < sizeof(array) / sizeof(*array); i++) + x += array[i]; + return x; +} + +/* { dg-final { scan-assembler "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */ Index: gcc/function.c =================================================================== --- gcc/function.c (revision 151884) +++ gcc/function.c (working copy) @@ -4133,6 +4133,9 @@ allocate_struct_function (tree fndecl, b /* Assume all registers in stdarg functions need to be saved. */ cfun->va_list_gpr_size = VA_LIST_MAX_GPR_SIZE; cfun->va_list_fpr_size = VA_LIST_MAX_FPR_SIZE; + + cfun->forced_stack_alignment = 0; + cfun->forced_stack_mode = VOIDmode; } } Index: gcc/function.h =================================================================== --- gcc/function.h (revision 151884) +++ gcc/function.h (working copy) @@ -535,6 +535,12 @@ struct function GTY(()) per-function in order to allow IPA passes to introduce new functions. */ VEC(ipa_opt_pass,heap) * GTY((skip)) ipa_transforms_to_apply; + /* The largest alignment forced on the stack. */ + unsigned int forced_stack_alignment; + + /* The mode of the largest alignment forced on the stack. */ + enum machine_mode forced_stack_mode; + /* Collected bit flags. */ /* Number of units of general registers that need saving in stdarg Index: gcc/tree-vect-analyze.c =================================================================== --- gcc/tree-vect-analyze.c (revision 151884) +++ gcc/tree-vect-analyze.c (working copy) @@ -1630,6 +1630,8 @@ vect_verify_datarefs_alignment (loop_vec { gimple stmt = DR_STMT (dr); stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + enum machine_mode mode; + unsigned int align; /* For interleaving, only the alignment of the first access matters. */ if (STMT_VINFO_STRIDED_ACCESS (stmt_info) @@ -1650,6 +1652,15 @@ vect_verify_datarefs_alignment (loop_vec } return false; } + + mode = TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)); + align = GET_MODE_ALIGNMENT (mode); + if (cfun->forced_stack_alignment < align) + { + cfun->forced_stack_alignment = align; + cfun->forced_stack_mode = mode; + } + if (supportable_dr_alignment != dr_aligned && vect_print_dump_info (REPORT_ALIGNMENT)) fprintf (vect_dump, "Vectorizing an unaligned access."); Index: gcc/config/i386/i386.c =================================================================== --- gcc/config/i386/i386.c (revision 151884) +++ gcc/config/i386/i386.c (working copy) @@ -8034,16 +8034,31 @@ find_drap_reg (void) } } +#define VALID_SSE_VECTOR_MODE(MODE) \ + ((MODE) == V4SFmode || (MODE) == V4SImode || (MODE) == V2DFmode \ + || (MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DImode) + /* Update incoming stack boundary and estimated stack alignment. */ static void ix86_update_stack_boundary (void) { + /* Should we use STACK_BOUNDARY for incoming stack boundary? */ + unsigned int incoming_stack_boundary; + + /* In 32bit, use STACK_BOUNDARY for incoming stack boundary if any + SSE variables are put on stack. */ + if (!TARGET_64BIT + && VALID_SSE_VECTOR_MODE (cfun->forced_stack_mode)) + incoming_stack_boundary = STACK_BOUNDARY; + else + incoming_stack_boundary = ix86_default_incoming_stack_boundary; + /* Prefer the one specified at command line. */ ix86_incoming_stack_boundary = (ix86_user_incoming_stack_boundary ? ix86_user_incoming_stack_boundary - : ix86_default_incoming_stack_boundary); + : incoming_stack_boundary); /* Incoming stack alignment can be changed on individual functions via force_align_arg_pointer attribute. We use the smallest @@ -19575,7 +19590,7 @@ ix86_minimum_alignment (tree exp, enum m { tree type, decl; - if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64) + if (TARGET_64BIT) return align; if (exp && DECL_P (exp)) @@ -19589,6 +19604,25 @@ ix86_minimum_alignment (tree exp, enum m decl = NULL; } + /* In 32bit, update forced_stack_mode if any SSE variables are put + on stack. */ + if (cfun->forced_stack_alignment < 128) + { + if (VALID_SSE_VECTOR_MODE (mode)) + { + cfun->forced_stack_alignment = 128; + cfun->forced_stack_mode = mode; + } + else if ((type && VALID_SSE_VECTOR_MODE (TYPE_MODE (type)))) + { + cfun->forced_stack_alignment = 128; + cfun->forced_stack_mode = TYPE_MODE (type); + } + } + + if (align != 64 || ix86_preferred_stack_boundary >= 64) + return align; + /* Don't do dynamic stack realignment for long long objects with -mpreferred-stack-boundary=2. */ if ((mode == DImode || (type && TYPE_MODE (type) == DImode))