This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

[PATCH]: Force stack align changes (2/3)


Part 2 of 3, the i386.c changes.

ChangeLog is on another computer, I apologize, i'll submit it as soon as
i'm on that computer again.

Most of this is pretty straightforward, it's just outputting a
prologue/epilogue that forces stack alignment.

Note the #if 0'd part is the part that outputs the aligned entry point,
that i can't get working, no matter how hard I tried, with optimization
on.
--Dan

*** /boot/home/write/egcs/gcc/config/i386/i386.c	Tue Jan  2 19:24:27 2001
--- i386.c	Wed Jan  3 14:21:17 2001
*************** static enum rtx_code unsigned_comparison
*** 390,395 ****
--- 401,407 ----
  static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
  static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
  							   rtx *, rtx *));
+ static char *build_aligned_name PARAMS ((const char *));
  static rtx gen_push PARAMS ((rtx));
  static int memory_address_length PARAMS ((rtx addr));
  static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
*************** static int ix86_split_to_parts PARAMS ((
*** 408,413 ****
--- 420,426 ----
  static int ix86_safe_length_prefix PARAMS ((rtx));
  static HOST_WIDE_INT ix86_compute_frame_size PARAMS((HOST_WIDE_INT,
  						     int *, int *, int *));
+ static HOST_WIDE_INT ix86_compute_frame_size_fsa PARAMS((HOST_WIDE_INT));
  static int ix86_nsaved_regs PARAMS((void));
  static void ix86_emit_save_regs PARAMS((void));
  static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int));
*************** static void ix86_emit_epilogue_esp_adjus
*** 415,421 ****
  static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
  static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
  static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
- 
  struct ix86_address
  {
    rtx base, index, disp;
--- 428,433 ----
*************** override_options ()
*** 632,638 ****
--- 644,654 ----
    /* It makes no sense to ask for just SSE builtins, so MMX is also turned
       on by -msse.  */
    if (TARGET_SSE)
+   {
      target_flags |= MASK_MMX;
+     ix86_preferred_stack_boundary = SSE_ALIGNMENT;
+   }
+ 
  }
  
  /* A C statement (sans semicolon) to choose the order in which to
*************** function_arg_advance (cum, mode, type, n
*** 917,933 ****
      fprintf (stderr,
  	     "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
  	     words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
  
!   cum->words += words;
!   cum->nregs -= words;
!   cum->regno += words;
! 
!   if (cum->nregs <= 0)
!     {
!       cum->nregs = 0;
!       cum->regno = 0;
!     }
! 
    return;
  }
  
--- 933,961 ----
      fprintf (stderr,
  	     "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
  	     words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
+   if (TARGET_SSE && mode == TImode)
+   {
+ 	  cum->sse_words += words;
+ 	  cum->sse_nregs -= 1;
+ 	  cum->sse_regno += 1;
+ 	  if (cum->sse_nregs <= 0)
+ 	  {
+ 		  cum->sse_nregs = 0;
+ 		  cum->sse_regno = 0;
+ 	  }
+   }
+   else         
+   {
+ 	  cum->words += words;
+ 	  cum->nregs -= words;
+ 	  cum->regno += words;
  
! 	  if (cum->nregs <= 0)
! 	  {
! 		  cum->nregs = 0;
! 		  cum->regno = 0;
! 	  }
!   }
    return;
  }
  
*************** function_arg (cum, mode, type, named)
*** 970,975 ****
--- 998,1007 ----
        if (words <= cum->nregs)
  	ret = gen_rtx_REG (mode, cum->regno);
        break;
+     case TImode:
+       if (cum->sse_nregs)
+         ret = gen_rtx_REG (mode, cum->sse_regno);
+       break;    
      }
  
    if (TARGET_DEBUG_ARG)
*************** ix86_nsaved_regs ()
*** 1779,1784 ****
--- 1811,1823 ----
    return nregs;
  }
  
+ HOST_WIDE_INT
+ ix86_initial_elimination_offset_fsa ()
+ {
+   HOST_WIDE_INT tsize = ix86_compute_frame_size_fsa (get_frame_size ());
+   return tsize;
+ }
+ 
  /* Return the offset between two registers, one to be eliminated, and the other
     its replacement, at the start of a routine.  */
  
*************** ix86_emit_save_regs ()
*** 1925,1944 ****
  
    for (regno = limit - 1; regno >= 0; regno--)
      if ((regs_ever_live[regno] && !call_used_regs[regno])
! 	|| (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
        {
  	insn = emit_insn (gen_push (gen_rtx_REG (SImode, regno)));
  	RTX_FRAME_RELATED_P (insn) = 1;
        }
  }
  
  /* Expand the prologue into a bunch of separate insns.  */
  
  void
  ix86_expand_prologue ()
  {
!   HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), (int *) 0, (int *) 0,
! 						 (int *) 0);
    rtx insn;
    int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
  				  || current_function_uses_const_pool);
--- 1964,2186 ----
  
    for (regno = limit - 1; regno >= 0; regno--)
      if ((regs_ever_live[regno] && !call_used_regs[regno])
! 	|| (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used)
! 	)
!       {
! 	insn = emit_insn (gen_push (gen_rtx_REG (SImode, regno)));
! 	RTX_FRAME_RELATED_P (insn) = 1;
!       }
! }
! 
! /* Expand the prologue into a bunch of separate insns P3-specific. */
! void
! ix86_expand_prologue_fsa ()
! {
!  register int regno;
!   int limit;
!   int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
! 				  || current_function_uses_const_pool);
!   HOST_WIDE_INT tsize = ix86_compute_frame_size_fsa (get_frame_size ());
!   rtx insn;
!   int argp_offset = 4;
!   rtx tmp_ap = gen_rtx_REG (SImode, ARG_POINTER_REGNUM);
!   rtx common_label;
!   rtx aligned_label;
!   rtx formatted_name_rtl;
!   char *formatted_name, *aligned_name;
!   rtx dwarf2_nop;
! 
!   /* Since we have 2 entry points, we don't want the dwarf unwinder to
!      generate and entry for both. Only one of the 2 entry points is
!      actually executed, and they are functionally identical as far as
!      the unwinder is concerned. We will lie to it about the first
!      prologue, and give it correct information for the second one.  */
! 
!   dwarf2_nop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, stack_pointer_rtx);
! 
!   limit = STACK_POINTER_REGNUM;
!   for (regno = limit - 1; regno >= 0; regno--)
!     if ((regs_ever_live[regno] && ! call_used_regs[regno])
! 	|| (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used)
! 	|| (regno == ARG_POINTER_REGNUM))
        {
  	insn = emit_insn (gen_push (gen_rtx_REG (SImode, regno)));
  	RTX_FRAME_RELATED_P (insn) = 1;
+ 	REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
+ 					      dwarf2_nop,
+ 					      REG_NOTES (insn));
+ 
+ 	argp_offset += 4;
+       }
+ 
+    /*  Generate code for two separate entry points.  The default entry
+        will generate code to align the stack to a 128 boundary.  The
+        second entry point bypasses the alignment code and will be
+        called for those cases when we know that the stack is already
+        aligned.
+        
+       entry:
+          push ebp                ; save off fp
+          push esi                ; esi will be used for ap
+          push xxx                ; any other required saves
+ 	 movl esp,esi            ; save arg pointer
+ 	 subl $4,esp             ; make room for saved sp ???
+ 	 andl $-16,esp           ; align stack pointer
+          jmp common
+ 
+       entry.aligned:
+ 	 push ebp                ; save off fp
+          push esi                ; esi will be used for ap
+          push xxx                ; any other required saves
+          movl esp,esi            ; save arg pointer
+          subl $4, esp            ; make room for saved sp ???
+ 
+       common:
+ 
+ 	 movl esi, (esp)         
+ 	 [ movl esp, ebp ]
+ 	 addl xxx,esi
+    */
+ 
+   insn = emit_move_insn (tmp_ap, stack_pointer_rtx);
+   REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
+ 					dwarf2_nop,
+ 					REG_NOTES (insn));
+ 
+   insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+ 	                        GEN_INT (-4)));
+   RTX_FRAME_RELATED_P (insn) = 1;
+   REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
+ 					dwarf2_nop,
+ 					REG_NOTES (insn));
+ 
+   insn = emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
+                                 GEN_INT (-16)));
+   RTX_FRAME_RELATED_P (insn) = 1;
+   REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
+ 					dwarf2_nop,
+ 					REG_NOTES (insn));
+ /* I can't make the aligned name work right. When I attempt to generate labels, which is the way to do this, we bomb out in the scheduler trying ot schedule the code_label's */
+ #if 0
+   common_label = gen_label_rtx ();
+   LABEL_NUSES (common_label) = 1;
+   insn = emit_jump_insn (gen_jump (common_label));
+   JUMP_LABEL (insn) = common_label;
+ 
+   formatted_name_rtl = DECL_RTL (current_function_decl);
+   if (GET_CODE (formatted_name_rtl) == MEM)
+     formatted_name_rtl = XEXP (formatted_name_rtl, 0);
+   if (GET_CODE (formatted_name_rtl) != SYMBOL_REF)
+     abort();
+ 
+   STRIP_NAME_ENCODING (formatted_name, XSTR (formatted_name_rtl, 0));
+   aligned_label = gen_label_rtx ();
+   aligned_name = build_aligned_name (formatted_name);
+ 
+   LABEL_ALTERNATE_NAME (aligned_label) = aligned_name;
+   LABEL_NUSES (aligned_label) = 1;
+   emit_label (aligned_label);
+ 
+   /* caller saved registers.  */
+   for (regno = limit - 1; regno >= 0; regno--)
+     if ((regs_ever_live[regno] && ! call_used_regs[regno])
+         || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used)
+         || (regno == ARG_POINTER_REGNUM))
+       {
+         insn = emit_insn (gen_push (gen_rtx_REG (SImode, regno)));
+         RTX_FRAME_RELATED_P (insn) = 1;
        }
+ 
+   insn = emit_move_insn (tmp_ap, stack_pointer_rtx);
+   RTX_FRAME_RELATED_P (insn) = 1;
+ 
+   insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+ 	                        GEN_INT (-(PREFERRED_STACK_BOUNDARY-argp_offset))));
+   /* We need to hide this stack adjustment since its not actually related
+      to unwinding, and we dont want dwarf2_stack_adjustment to get confused.  */
+   RTX_FRAME_RELATED_P (insn) = 1;
+   REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
+ 					gen_rtx_SET (VOIDmode, tmp_ap, tmp_ap),
+ 					REG_NOTES (insn));
+ 
+   emit_label (common_label);
+ #endif
+   insn = emit_move_insn (gen_rtx_MEM (Pmode, stack_pointer_rtx), tmp_ap);
+   RTX_FRAME_RELATED_P (insn) = 1;
+   if (regs_ever_live[REGNO (tmp_ap)])
+     {
+       insn = emit_insn (gen_addsi3 (tmp_ap, tmp_ap, GEN_INT (argp_offset)));
+       emit_insn (gen_rtx_USE (VOIDmode, tmp_ap));
+     }
+ 
+   if (frame_pointer_needed)
+     {
+       insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
+       RTX_FRAME_RELATED_P (insn) = 1;
+     }
+ 
+   if (tsize == 0)
+     ;
+   else if (! TARGET_STACK_PROBE || tsize < CHECK_STACK_LIMIT)
+     {
+       if (frame_pointer_needed)
+ 	{
+ 	  insn = emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
+ 						         stack_pointer_rtx,
+ 						         GEN_INT (-tsize),
+ 						         hard_frame_pointer_rtx));
+ 	  REG_NOTES (insn)
+ 	    = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
+ 				 gen_rtx_SET (VOIDmode,
+ 					      stack_pointer_rtx,
+ 					      gen_rtx_PLUS (Pmode,
+ 							    stack_pointer_rtx,
+ 							    GEN_INT (-tsize))),
+ 				 REG_NOTES (insn));
+ 	}
+       else
+         insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+ 				      GEN_INT (-tsize)));
+       RTX_FRAME_RELATED_P (insn) = 1;
+     }
+   else
+     {
+       /* ??? Is this only valid for Win32?  */
+ 
+       rtx arg0, sym;
+ 
+       arg0 = gen_rtx_REG (SImode, 0);
+       emit_move_insn (arg0, GEN_INT (tsize));
+ 
+       sym = gen_rtx_MEM (FUNCTION_MODE,
+ 			 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
+       insn = emit_call_insn (gen_call (sym, const0_rtx));
+ 
+       CALL_INSN_FUNCTION_USAGE (insn)
+ 	= gen_rtx_EXPR_LIST (VOIDmode, arg0, CALL_INSN_FUNCTION_USAGE (insn));
+     }
+ 
+ #ifdef SUBTARGET_PROLOGUE
+   SUBTARGET_PROLOGUE;
+ #endif  
+ 
+   if (pic_reg_used)
+     load_pic_register ();
+ 
+   /* If we are profiling, make sure no instructions are scheduled before
+      the call to mcount.  However, if -fpic, the above call will have
+      done that.  */
+   if ((profile_flag || profile_block_flag) && ! pic_reg_used)
+     emit_insn (gen_blockage ());
  }
  
+ 
  /* Expand the prologue into a bunch of separate insns.  */
  
  void
  ix86_expand_prologue ()
  {
!   HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), (int *) 0, (int *) 0, (int *) 0);
    rtx insn;
    int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
  				  || current_function_uses_const_pool);
*************** ix86_expand_prologue ()
*** 2002,2007 ****
--- 2244,2261 ----
      emit_insn (gen_blockage ());
  }
  
+ static char *
+ build_aligned_name (name)
+        const char *name;
+ {
+ /* FIXME: use ggc_alloc_string properly */
+   char *aligned_name = xmalloc(strlen(name) + 9);
+   
+   strcpy (aligned_name, name);
+   strcat (aligned_name, ".aligned");
+   return aligned_name;
+ }
+ 
  /* Emit code to add TSIZE to esp value.  Use POP instruction when
     profitable.  */
  
*************** ix86_emit_restore_regs_using_mov (pointe
*** 2045,2050 ****
--- 2299,2366 ----
  	offset += 4;
        }
  }
+ /* Restore function stack, frame, and registers.  Piii-specific */
+ 
+ void
+ ix86_expand_epilogue_fsa ( emit_return )
+       int emit_return;
+ {
+   register int regno;
+   register int limit;
+   int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
+ 				  || current_function_uses_const_pool);
+   HOST_WIDE_INT tsize = ix86_compute_frame_size_fsa (get_frame_size ());
+   rtx addr;
+ 
+   if (frame_pointer_needed)
+     addr = hard_frame_pointer_rtx;
+   else
+     addr = plus_constant (stack_pointer_rtx, tsize);
+   emit_move_insn (stack_pointer_rtx, gen_rtx_MEM (Pmode, addr));
+ 
+   /* If we're only restoring one register and sp is not valid then
+      using a move instruction to restore the register since it's
+      less work than reloading sp and popping the register.  Otherwise,
+      restore sp (if necessary) and pop the registers. */
+ 
+   for (regno = 0; regno < STACK_POINTER_REGNUM; regno++)
+     if ((regs_ever_live[regno] && ! call_used_regs[regno])
+ 	|| (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used)
+ 	|| (regno == ARG_POINTER_REGNUM))
+       {
+ 	emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
+       }
+ 
+ #ifdef FUNCTION_BLOCK_PROFILER_EXIT
+   if (profile_block_flag == 2)
+     {
+       FUNCTION_BLOCK_PROFILER_EXIT;
+     }
+ #endif
+   if (! emit_return)
+     return;
+   if (current_function_pops_args && current_function_args_size)
+     {
+       rtx popc = GEN_INT (current_function_pops_args);
+ 
+       /* i386 can only pop 32K bytes (maybe 64K?  Is it signed?).  If
+ 	 asked to pop more, pop return address, do explicit add, and jump
+ 	 indirectly to the caller. */
+ 
+       if (current_function_pops_args >= 65536)
+ 	{
+ 	  rtx ecx = gen_rtx_REG (SImode, 2);
+ 
+ 	  emit_insn (gen_popsi1 (ecx));
+ 	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
+ 	  emit_jump_insn (gen_return_indirect_internal (ecx));
+ 	}
+       else
+ 	emit_jump_insn (gen_return_pop_internal (popc));
+     }
+   else
+     emit_jump_insn (gen_return_internal ());
+ }
  
  /* Restore function stack, frame, and registers.  */
  
*************** legitimize_address (x, oldx, mode)
*** 2920,2925 ****
--- 3236,3295 ----
    return x;
  }
  
+ HOST_WIDE_INT
+ ix86_compute_frame_size_fsa (size)
+      HOST_WIDE_INT size;
+ {
+ #ifdef PREFERRED_STACK_BOUNDARY
+   int preferred_alignment = PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT;
+ #endif
+   
+   size += preferred_alignment - 1;
+   size &= ~(preferred_alignment - 1);
+ 
+   return size;
+ 
+ }
+ void
+ ix86_encode_section_info (decl)
+       tree decl;
+ {
+   if (flag_pic)
+     {
+       rtx rtl = (TREE_CODE_CLASS (TREE_CODE (decl)) != 'd'
+                  ? TREE_CST_RTL (decl) : DECL_RTL (decl));
+ 
+       if (GET_CODE (rtl) == MEM)
+ 	{
+ 	  if (TARGET_DEBUG_ADDR
+ 	      && TREE_CODE_CLASS (TREE_CODE (decl)) == 'd')
+ 	    {
+ 	      fprintf (stderr, "Encode %s, public = %d\n",
+ 		       IDENTIFIER_POINTER (DECL_NAME (decl)),
+ 		       TREE_PUBLIC (decl));
+ 	    }
+ 
+ 	  SYMBOL_REF_FLAG (XEXP (rtl, 0))
+ 	    = (TREE_CODE_CLASS (TREE_CODE (decl)) != 'd'
+ 	       || ! TREE_PUBLIC (decl));
+ 	}
+     }
+ 
+   /* Mark same file static function decls.  */
+   if (TARGET_SSE
+       && TREE_CODE (decl) == FUNCTION_DECL
+       && (TREE_ASM_WRITTEN (decl) || ! TREE_PUBLIC (decl))
+       /* && !DECL_INLINE (decl) */
+       && !DECL_WEAK (decl))
+     {
+       rtx sym_ref = XEXP (DECL_RTL (decl), 0);
+       char *str = permalloc (2 + strlen (XSTR (sym_ref, 0)));
+       strcpy (str, "+");
+       strcat (str, XSTR (sym_ref, 0));
+       XSTR (sym_ref, 0) = str;
+     }
+ }
+ 
  /* Print an integer constant expression in assembler syntax.  Addition
     and subtraction are the only arithmetic that may appear in these
     expressions.  FILE is the stdio stream to write to, X is the rtx, and
*************** output_pic_addr_const (file, x, code)
*** 2932,2937 ****
--- 3302,3308 ----
       int code;
  {
    char buf[256];
+   char *real_name;
  
    switch (GET_CODE (x))
      {
*************** output_pic_addr_const (file, x, code)
*** 2943,2949 ****
        break;
  
      case SYMBOL_REF:
!       assemble_name (file, XSTR (x, 0));
        if (code == 'P' && ! SYMBOL_REF_FLAG (x))
  	fputs ("@PLT", file);
        break;
--- 3314,3333 ----
        break;
  
      case SYMBOL_REF:
!       STRIP_NAME_ENCODING (real_name, XSTR (x,0));
!       if (code == 'P' 
!           && TARGET_FORCE_STACK_ALIGNMENT
!           && SAME_FILE_NAME_P (XSTR (x,0)))
!         {
! 	  char *aligned_name;
! 	  tree id = maybe_get_identifier (real_name);
! 	  BUILD_ALIGNED_NAME (real_name, aligned_name);
! 	  assemble_name (file, aligned_name);
! 	  if (id)
! 	    TREE_SYMBOL_REFERENCED (id) = 1;
!         }
!       else
! 	  assemble_name (file, XSTR (x, 0));
        if (code == 'P' && ! SYMBOL_REF_FLAG (x))
  	fputs ("@PLT", file);
        break;
*************** print_reg (x, code, file)
*** 3169,3175 ****
       int code;
       FILE *file;
  {
!   if (REGNO (x) == ARG_POINTER_REGNUM
        || REGNO (x) == FRAME_POINTER_REGNUM
        || REGNO (x) == FLAGS_REG
        || REGNO (x) == FPSR_REG)
--- 3553,3559 ----
       int code;
       FILE *file;
  {
!   if (REGNO (x) == NORMAL_ARG_POINTER_REGNUM
        || REGNO (x) == FRAME_POINTER_REGNUM
        || REGNO (x) == FLAGS_REG
        || REGNO (x) == FPSR_REG)
*************** print_operand (file, x, code)
*** 3499,3504 ****
--- 3883,3906 ----
  	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
        else if (flag_pic)
  	output_pic_addr_const (file, x, code);
+       else if (TARGET_FORCE_STACK_ALIGNMENT
+                && GET_CODE (x) == SYMBOL_REF
+                && SAME_FILE_NAME_P (XSTR (x, 0)))
+         { 
+ 	  tree id;
+ 	  char *aligned_name, *real_name;
+ 	  STRIP_NAME_ENCODING (real_name, XSTR (x,0));
+           if (code == 'P')
+             {
+ 	      BUILD_ALIGNED_NAME (real_name, aligned_name);
+ 	      assemble_name (file, aligned_name);
+ 	      id = maybe_get_identifier (real_name);
+ 	      if (id)
+ 	    	TREE_SYMBOL_REFERENCED (id) = 1;
+             }
+           else
+ 	    assemble_name (file, real_name);
+         }
        else
  	output_addr_const (file, x);
      }
*************** print_operand_address (file, addr)
*** 3539,3544 ****
--- 3941,3960 ----
  	}
        else if (flag_pic)
  	output_pic_addr_const (file, addr, 0);
+       else if (TARGET_FORCE_STACK_ALIGNMENT
+                && GET_CODE (addr) == SYMBOL_REF
+                && SAME_FILE_NAME_P (XSTR (addr, 0)))
+         {
+ 	  char *aligned_name, *real_name; 
+ 	  tree id;
+ 	  STRIP_NAME_ENCODING (real_name, XSTR (addr ,0));
+ 	  BUILD_ALIGNED_NAME (real_name, aligned_name);
+ 	  assemble_name (file, aligned_name);
+ 	  id = maybe_get_identifier (real_name);
+ 	  if (id)
+ 	    TREE_SYMBOL_REFERENCED (id) = 1;
+         }
+ 
        else
  	output_addr_const (file, addr);
      }
*************** ix86_init_builtins ()
*** 7284,7290 ****
  {
    struct builtin_description * d;
    int i;
!   tree endlink = tree_cons (NULL_TREE, void_type_node, NULL_TREE);
  
    tree pchar_type_node = build_pointer_type (char_type_node);
    tree pfloat_type_node = build_pointer_type (float_type_node);
--- 7700,7706 ----
  {
    struct builtin_description * d;
    int i;
!   tree endlink = build_tree_list(NULL_TREE, void_type_node);
  
    tree pchar_type_node = build_pointer_type (char_type_node);
    tree pfloat_type_node = build_pointer_type (float_type_node);
*************** ix86_init_builtins ()
*** 7323,7328 ****
--- 7739,7747 ----
      = build_function_type (integer_type_node,
  			   tree_cons (NULL_TREE, V2SI_type_node,
  				      endlink));
+   tree v2si_ftype_void
+     = build_function_type (V2SI_type_node,endlink);
+ 
    tree v2si_ftype_int
      = build_function_type (V2SI_type_node,
  			   tree_cons (NULL_TREE, integer_type_node,
*************** ix86_init_builtins ()
*** 7643,7650 ****
--- 8064,8073 ----
    def_builtin ("__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
    def_builtin ("__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
    def_builtin ("__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
+   def_builtin ("__builtin_ia32_loadps",  v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS);
    def_builtin ("__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
    def_builtin ("__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
+   def_builtin ("__builtin_ia32_storeps",  void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS);
    def_builtin ("__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
    def_builtin ("__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
  }
*************** ix86_expand_builtin (exp, target, subtar
*** 8257,8263 ****
        target = gen_reg_rtx (TImode);
        emit_insn (gen_sse_clrti (target));
        return target;
! 
      case IX86_BUILTIN_LOADRPS:
        target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
  					 gen_reg_rtx (V4SFmode), 1);
--- 8680,8690 ----
        target = gen_reg_rtx (TImode);
        emit_insn (gen_sse_clrti (target));
        return target;
!     
!     case IX86_BUILTIN_LOADPS:
!       return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
! 		      			 gen_reg_rtx (V4SFmode), 1);
!       
      case IX86_BUILTIN_LOADRPS:
        target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
  					 gen_reg_rtx (V4SFmode), 1);
*************** ix86_expand_builtin (exp, target, subtar
*** 8269,8275 ****
  					 gen_reg_rtx (V4SFmode), 1);
        emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
        return target;
! 
      case IX86_BUILTIN_STOREPS1:
        return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
      case IX86_BUILTIN_STORERPS:
--- 8696,8703 ----
  					 gen_reg_rtx (V4SFmode), 1);
        emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
        return target;
!     case IX86_BUILTIN_STOREPS:
!       return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
      case IX86_BUILTIN_STOREPS1:
        return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
      case IX86_BUILTIN_STORERPS:


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]