This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

PR 28490 question & patch (IA64 bug & performance issue)


While working on PR 28490 I did some timings comparing what would happen
if we allowed any (symbol+offset) usage in addresses and what would
happen if we allowed no (symbol+offset) usage.  Currently we allow it
for large offsets but force small offsets to be added in explicitly.

Performance wise, allowing no offsets seems to be the better option.

Here are the SPEC 2000 timings I did and the ratio of the new code /
existing code.  Less than 1 is a speed up in the new code, greater than
1 is a slow down.  The first column of numbers is where I only allowed
symbols, and the second column is where I allowed symbols plus any
constant.  This was done on HP-UX but in LP64 mode so it should map
well to IA64 Linux.

As you can see allowing only symbols with no offset seems to be the
better option.


                Symbol only	 Symbol + any-const

164.gzip        0.993220339      0.996125908
175.vpr         0.9915305505     0.9951603146
176.gcc         1.0019723866     1
181.mcf         1.0143942423     1.043582567
186.crafty      1.0111492281     1.0197255575
197.parser      0.995258791      0.9988146977
252.eon         1	         1.0032432432
253.perlbmk     0.9952090592     0.9960801394
254.gap         1.0074666667     1.0037333333
255.vortex      0.9911286348     0.9955643174
256.bzip2       0.9836984208     1
300.twolf       0.9793233083     0.9818295739

168.wupwise     0.999685831      0.9987433239
171.swim        0.9997538764     1.002215112
172.mgrid       0.9996411912     1.0023322569
173.applu       0.9993701449     1.0016796137
177.mesa        0.9956873315     1.0021563342
178.galgel      0.9877575396     0.9871603464
179.art         0.986975398      1.005788712
183.equake      1.0183916336     1.0180310133
187.facerec     0.9872272485     0.9950771687
188.ammp        0.9982650937     1.0006939625
89.lucas        0.9986836332     1.0048266784
191.fma3d       1.001182832      1.00042244
200.sixtrack    1.0009302326     1.0011627907
301.apsi        1.0003770028     1.0001885014

I have attached the patch that I used for the 'symbol only' case but I
have a question about it.  If you apply my patch and then compile the
following test:

	extern char object[];
	char *r_1 (void) { return &object[1]; }
	char *R_1 (void) { return &object[-1]; }

I find the different code in r_1 and R_1 to be interesting, for r_1 we
create a variable to hold the value of &object[1] and load that, for
R_1, we just load the address of object and subtract 1.  I was wondering
if there was a way to avoid the creation of the .LC0 variable that r_1
is using.  I would like to find out if there is a performance advantage
to doing that.  Looking at how the two functions differ when being
compiled it looks like they diverge in the final if statement in
get_inner_reference.  Presumably the second one doesn't set up the
temporary variable because it doesn't know how to handle object[-1] due
to overflow/underflow concerns.

So my question is, is there something I can set to stop r_1 from using
a temporary data area and use a run-time addition to get the address
of object[1] instead?  get_inner_reference does not seem to be checking
any target flags when making this decision.

Attached is the patch I am currently playing with.

Steve Ellcey
sje@cup.hp.com


2006-09-05  Steve Ellcey  <sje@cup.hp.com>

	* config/ia64/predicates.md (symbolic_operand): Remove const handling.
	(sdata_symbolic_operand): Ditto.
	(small_addr_symbolic_operand): Ditto.
	(got_symbolic_operand): Ditto.
	(tls_symbolic_operand): Ditto.
	(ie_tls_symbolic_operand): Ditto.
	(move_operand): Ditto.
	* config/ia64/ia64.c (tls_symbolic_operand_type): Ditto.
	(ia64_legitimate_constant_p): Ditto.
	(ia64_expand_load_address): Ditto.
	(ia64_expand_tls_address): Ditto.
	(ia64_expand_move): Ditto.


Index: config/ia64/predicates.md
===================================================================
--- config/ia64/predicates.md	(revision 116011)
+++ config/ia64/predicates.md	(working copy)
@@ -27,7 +27,7 @@ (define_predicate "call_operand"
 ;; For roughly the same reasons that pmode_register_operand exists, this
 ;; predicate ignores its mode argument.
 (define_special_predicate "symbolic_operand" 
-   (match_code "symbol_ref,const,label_ref"))
+   (match_code "symbol_ref,label_ref"))
 
 ;; True if OP is a SYMBOL_REF which refers to a function.
 (define_predicate "function_operand"
@@ -36,31 +36,18 @@ (define_predicate "function_operand"
 
 ;; True if OP refers to a symbol in the sdata section.
 (define_predicate "sdata_symbolic_operand" 
-  (match_code "symbol_ref,const")
+  (match_code "symbol_ref")
 {
   HOST_WIDE_INT offset = 0, size = 0;
 
-  switch (GET_CODE (op))
+  if (CONSTANT_POOL_ADDRESS_P (op))
     {
-    case CONST:
-      op = XEXP (op, 0);
-      if (GET_CODE (op) != PLUS
-	  || GET_CODE (XEXP (op, 0)) != SYMBOL_REF
-	  || GET_CODE (XEXP (op, 1)) != CONST_INT)
+      size = GET_MODE_SIZE (get_pool_mode (op));
+      if (size > ia64_section_threshold)
 	return false;
-      offset = INTVAL (XEXP (op, 1));
-      op = XEXP (op, 0);
-      /* FALLTHRU */
-
-    case SYMBOL_REF:
-      if (CONSTANT_POOL_ADDRESS_P (op))
-	{
-	  size = GET_MODE_SIZE (get_pool_mode (op));
-	  if (size > ia64_section_threshold)
-	    return false;
-	}
-      else
-	{
+    }
+  else
+    {
 	  tree t;
 
 	  if (!SYMBOL_REF_LOCAL_P (op) || !SYMBOL_REF_SMALL_P (op))
@@ -79,41 +66,22 @@ (define_predicate "sdata_symbolic_operan
 	      if (size < 0)
 		size = 0;
 	    }
-	}
+    }
 
       /* Deny the stupid user trick of addressing outside the object.  Such
-	 things quickly result in GPREL22 relocation overflows.  Of course,
+         things quickly result in GPREL22 relocation overflows.  Of course,
+         they're also highly undefined.  From a pure pedant's point of view
 	 they're also highly undefined.  From a pure pedant's point of view
 	 they deserve a slap on the wrist (such as provided by a relocation
 	 overflow), but that just leads to bugzilla noise.  */
-      return (offset >= 0 && offset <= size);
-
-    default:
-      gcc_unreachable ();
-    }
+    return (offset >= 0 && offset <= size);
 })
 
 ;; True if OP refers to a symbol in the small address area.
 (define_predicate "small_addr_symbolic_operand" 
-  (match_code "symbol_ref,const")
+  (match_code "symbol_ref")
 {
-  switch (GET_CODE (op))
-    {
-    case CONST:
-      op = XEXP (op, 0);
-      if (GET_CODE (op) != PLUS
-	  || GET_CODE (XEXP (op, 0)) != SYMBOL_REF
-	  || GET_CODE (XEXP (op, 1)) != CONST_INT)
-	return false;
-      op = XEXP (op, 0);
-      /* FALLTHRU */
-
-    case SYMBOL_REF:
-      return SYMBOL_REF_SMALL_ADDR_P (op);
-
-    default:
-      gcc_unreachable ();
-    }
+  return SYMBOL_REF_SMALL_ADDR_P (op);
 })
 
 ;; True if OP refers to a symbol with which we may use any offset.
@@ -142,42 +110,18 @@ (define_predicate "aligned_offset_symbol
 (define_predicate "got_symbolic_operand" 
   (match_operand 0 "symbolic_operand" "")
 {
-  HOST_WIDE_INT addend = 0;
-
   switch (GET_CODE (op))
     {
     case LABEL_REF:
       return true;
 
-    case CONST:
-      /* Accept only (plus (symbol_ref) (const_int)).  */
-      op = XEXP (op, 0);
-      if (GET_CODE (op) != PLUS
-	  || GET_CODE (XEXP (op, 0)) != SYMBOL_REF
-          || GET_CODE (XEXP (op, 1)) != CONST_INT)
-        return false;
-
-      addend = INTVAL (XEXP (op, 1));
-      op = XEXP (op, 0);
-      /* FALLTHRU */
-
     case SYMBOL_REF:
       /* These symbols shouldn't be used with got loads.  */
       if (SYMBOL_REF_SMALL_ADDR_P (op))
 	return false;
       if (SYMBOL_REF_TLS_MODEL (op) != 0)
 	return false;
-
-      if (any_offset_symbol_operand (op, mode))
-	return true;
-
-      /* The low 14 bits of the constant have been forced to zero
-	 so that we do not use up so many GOT entries.  Prevent cse
-	 from undoing this.  */
-      if (aligned_offset_symbol_operand (op, mode))
-	return (addend & 0x3fff) == 0;
-
-      return addend == 0;
+      return true;
 
     default:
       gcc_unreachable ();
@@ -186,40 +130,9 @@ (define_predicate "got_symbolic_operand"
 
 ;; Return true if OP is a valid thread local storage symbolic operand.
 (define_predicate "tls_symbolic_operand"
-  (match_code "symbol_ref,const")
+  (match_code "symbol_ref")
 {
-  switch (GET_CODE (op))
-    {
-    case SYMBOL_REF:
-      return SYMBOL_REF_TLS_MODEL (op) != 0;
-
-    case CONST:
-      op = XEXP (op, 0);
-      if (GET_CODE (op) != PLUS
-	  || GET_CODE (XEXP (op, 0)) != SYMBOL_REF
-	  || GET_CODE (XEXP (op, 1)) != CONST_INT)
-	return false;
-
-      /* We only allow certain offsets for certain tls models.  */
-      switch (SYMBOL_REF_TLS_MODEL (XEXP (op, 0)))
-	{
-	case TLS_MODEL_GLOBAL_DYNAMIC:
-	case TLS_MODEL_LOCAL_DYNAMIC:
-	  return false;
-
-	case TLS_MODEL_INITIAL_EXEC:
-	  return (INTVAL (XEXP (op, 1)) & 0x3fff) == 0;
-
-	case TLS_MODEL_LOCAL_EXEC:
-	  return true;
-
-	default:
-	  return false;
-	}
-
-    default:
-      gcc_unreachable ();
-    }
+  return SYMBOL_REF_TLS_MODEL (op) != 0;
 })
 
 ;; Return true if OP is a local-dynamic thread local storage symbolic operand.
@@ -229,49 +142,16 @@ (define_predicate "ld_tls_symbolic_opera
 
 ;; Return true if OP is an initial-exec thread local storage symbolic operand.
 (define_predicate "ie_tls_symbolic_operand"
-  (match_code "symbol_ref,const")
+  (match_code "symbol_ref")
 {
-  switch (GET_CODE (op))
-    {
-    case CONST:
-      op = XEXP (op, 0);
-      if (GET_CODE (op) != PLUS
-	  || GET_CODE (XEXP (op, 0)) != SYMBOL_REF
-	  || GET_CODE (XEXP (op, 1)) != CONST_INT
-	  || (INTVAL (XEXP (op, 1)) & 0x3fff) != 0)
-	return false;
-      op = XEXP (op, 0);
-      /* FALLTHRU */
-
-    case SYMBOL_REF:
-      return SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_INITIAL_EXEC;
-
-    default:
-      gcc_unreachable ();
-    }
+  return SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_INITIAL_EXEC;
 })
 
 ;; Return true if OP is a local-exec thread local storage symbolic operand.
 (define_predicate "le_tls_symbolic_operand"
-  (match_code "symbol_ref,const")
+  (match_code "symbol_ref")
 {
-  switch (GET_CODE (op))
-    {
-    case CONST:
-      op = XEXP (op, 0);
-      if (GET_CODE (op) != PLUS
-          || GET_CODE (XEXP (op, 0)) != SYMBOL_REF
-          || GET_CODE (XEXP (op, 1)) != CONST_INT)
-        return false;
-      op = XEXP (op, 0);
-      /* FALLTHRU */
-
-    case SYMBOL_REF:
-      return SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_LOCAL_EXEC;
-
-    default:
-      gcc_unreachable ();
-    }
+  return SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_LOCAL_EXEC;
 })
 
 ;; Like nonimmediate_operand, but don't allow MEMs that try to use a
@@ -289,49 +169,7 @@ (define_predicate "not_postinc_memory_op
 
 ;; True if OP is a general operand, with some restrictions on symbols.
 (define_predicate "move_operand"
-  (match_operand 0 "general_operand")
-{
-  switch (GET_CODE (op))
-    {
-    case CONST:
-      {
-	HOST_WIDE_INT addend;
-
-	/* Accept only (plus (symbol_ref) (const_int)).  */
-	op = XEXP (op, 0);
-	if (GET_CODE (op) != PLUS
-	    || GET_CODE (XEXP (op, 0)) != SYMBOL_REF
-            || GET_CODE (XEXP (op, 1)) != CONST_INT)
-	  return false;
-
-	addend = INTVAL (XEXP (op, 1));
-	op = XEXP (op, 0);
-
-	/* After reload, we want to allow any offset whatsoever.  This
-	   allows reload the opportunity to avoid spilling addresses to
-	   the stack, and instead simply substitute in the value from a
-	   REG_EQUIV.  We'll split this up again when splitting the insn.  */
-	if (reload_in_progress || reload_completed)
-	  return true;
-
-	/* Some symbol types we allow to use with any offset.  */
-	if (any_offset_symbol_operand (op, mode))
-	  return true;
-
-	/* Some symbol types we allow offsets with the low 14 bits of the
-	   constant forced to zero so that we do not use up so many GOT
-	   entries.  We want to prevent cse from undoing this.  */
-	if (aligned_offset_symbol_operand (op, mode))
-	  return (addend & 0x3fff) == 0;
-
-	/* The remaining symbol types may never be used with an offset.  */
-	return false;
-      }
-
-    default:
-      return true;
-    }
-})
+  (match_operand 0 "general_operand"))
 
 ;; True if OP is a register operand that is (or could be) a GR reg.
 (define_predicate "gr_register_operand"
Index: config/ia64/ia64.c
===================================================================
--- config/ia64/ia64.c	(revision 116011)
+++ config/ia64/ia64.c	(working copy)
@@ -782,18 +782,10 @@ ia64_depz_field_mask (rtx rop, rtx rshif
 static enum tls_model
 tls_symbolic_operand_type (rtx addr)
 {
-  enum tls_model tls_kind = 0;
-
-  if (GET_CODE (addr) == CONST)
-    {
-      if (GET_CODE (XEXP (addr, 0)) == PLUS
-	  && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
-        tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
-    }
-  else if (GET_CODE (addr) == SYMBOL_REF)
-    tls_kind = SYMBOL_REF_TLS_MODEL (addr);
-
-  return tls_kind;
+  if (GET_CODE (addr) == SYMBOL_REF)
+    return SYMBOL_REF_TLS_MODEL (addr);
+  else
+    return 0;
 }
 
 /* Return true if X is a constant that is valid for some immediate
@@ -813,7 +805,6 @@ ia64_legitimate_constant_p (rtx x)
 	return true;
       return CONST_DOUBLE_OK_FOR_G (x);
 
-    case CONST:
     case SYMBOL_REF:
       return tls_symbolic_operand_type (x) == 0;
 
@@ -868,43 +859,13 @@ ia64_expand_load_address (rtx dest, rtx 
     emit_insn (gen_load_gprel (dest, src));
   else
     {
-      HOST_WIDE_INT addend = 0;
       rtx tmp;
-
-      /* We did split constant offsets in ia64_expand_move, and we did try
-	 to keep them split in move_operand, but we also allowed reload to
-	 rematerialize arbitrary constants rather than spill the value to
-	 the stack and reload it.  So we have to be prepared here to split
-	 them apart again.  */
-      if (GET_CODE (src) == CONST)
-	{
-	  HOST_WIDE_INT hi, lo;
-
-	  hi = INTVAL (XEXP (XEXP (src, 0), 1));
-	  lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
-	  hi = hi - lo;
-
-	  if (lo != 0)
-	    {
-	      addend = lo;
-	      src = plus_constant (XEXP (XEXP (src, 0), 0), hi);
-	    }
-	}
-
       tmp = gen_rtx_HIGH (Pmode, src);
       tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
       emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
-
       tmp = gen_rtx_LO_SUM (Pmode, dest, src);
       emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
-
-      if (addend)
-	{
-	  tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
-	  emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
-	}
     }
-
   return true;
 }
 
@@ -927,12 +888,10 @@ gen_thread_pointer (void)
 }
 
 static rtx
-ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
-			 rtx orig_op1, HOST_WIDE_INT addend)
+ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1)
 {
   rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
   rtx orig_op0 = op0;
-  HOST_WIDE_INT addend_lo, addend_hi;
 
   switch (tls_kind)
     {
@@ -993,12 +952,6 @@ ia64_expand_tls_address (enum tls_model 
       break;
 
     case TLS_MODEL_INITIAL_EXEC:
-      addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
-      addend_hi = addend - addend_lo;
-
-      op1 = plus_constant (op1, addend_hi);
-      addend = addend_lo;
-
       tmp = gen_reg_rtx (Pmode);
       emit_insn (gen_load_tprel (tmp, op1));
 
@@ -1011,8 +964,6 @@ ia64_expand_tls_address (enum tls_model 
       if (!register_operand (op0, Pmode))
 	op0 = gen_reg_rtx (Pmode);
 
-      op1 = orig_op1;
-      addend = 0;
       if (TARGET_TLS64)
 	{
 	  emit_insn (gen_load_tprel (op0, op1));
@@ -1026,9 +977,6 @@ ia64_expand_tls_address (enum tls_model 
       gcc_unreachable ();
     }
 
-  if (addend)
-    op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
-			       orig_op0, 1, OPTAB_DIRECT);
   if (orig_op0 == op0)
     return NULL_RTX;
   if (GET_MODE (orig_op0) == Pmode)
@@ -1046,63 +994,18 @@ ia64_expand_move (rtx op0, rtx op1)
 
   if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
     {
-      HOST_WIDE_INT addend = 0;
       enum tls_model tls_kind;
-      rtx sym = op1;
 
-      if (GET_CODE (op1) == CONST
-	  && GET_CODE (XEXP (op1, 0)) == PLUS
-	  && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
-	{
-	  addend = INTVAL (XEXP (XEXP (op1, 0), 1));
-	  sym = XEXP (XEXP (op1, 0), 0);
-	}
-
-      tls_kind = tls_symbolic_operand_type (sym);
+      tls_kind = tls_symbolic_operand_type (op1);
       if (tls_kind)
-	return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
-
-      if (any_offset_symbol_operand (sym, mode))
-	addend = 0;
-      else if (aligned_offset_symbol_operand (sym, mode))
-	{
-	  HOST_WIDE_INT addend_lo, addend_hi;
-	      
-	  addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
-	  addend_hi = addend - addend_lo;
-
-	  if (addend_lo != 0)
-	    {
-	      op1 = plus_constant (sym, addend_hi);
-	      addend = addend_lo;
-	    }
-	  else
-	    addend = 0;
-	}
-      else
-	op1 = sym;
+	return ia64_expand_tls_address (tls_kind, op0, op1);
 
       if (reload_completed)
 	{
-	  /* We really should have taken care of this offset earlier.  */
-	  gcc_assert (addend == 0);
 	  if (ia64_expand_load_address (op0, op1))
 	    return NULL_RTX;
 	}
-
-      if (addend)
-	{
-	  rtx subtarget = no_new_pseudos ? op0 : gen_reg_rtx (mode);
-
-	  emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
-
-	  op1 = expand_simple_binop (mode, PLUS, subtarget,
-				     GEN_INT (addend), op0, 1, OPTAB_DIRECT);
-	  if (op0 == op1)
-	    return NULL_RTX;
-	}
     }
-
   return op1;
 }


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]