This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[vta] Some further speedups
- From: Jakub Jelinek <jakub at redhat dot com>
- To: Alexandre Oliva <aoliva at redhat dot com>, Richard Henderson <rth at redhat dot com>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Tue, 30 Jun 2009 13:48:49 +0200
- Subject: [vta] Some further speedups
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!
This patch on top of
http://gcc.gnu.org/ml/gcc-patches/2009-06/msg01972.html
http://gcc.gnu.org/ml/gcc-patches/2009-06/msg02019.html
http://gcc.gnu.org/ml/gcc-patches/2009-06/msg02224.html
speeds up set_slot_part, where in some cases chains had average length 35
and so calling loc_cmp each time when we already know the lists are sorted
was very expensive.
Total callgrind events in set_slot_part decreased on HTML_401F.class from
295,830,806,997 /usr/src/vta/obj/gcc/../../gcc/var-tracking.c:set_slot_part [/usr/src/vta/obj/gcc/jc1.v04]
to
78,907,010,112 /usr/src/vta/obj/gcc/../../gcc/var-tracking.c:set_slot_part [/usr/src/vta/obj/gcc/jc1.v05]
See ftp://ultra.linux.cz/private/HTML_401F.class/jc1.v04
for full callgrind dump before this patch and
ftp://ultra.linux.cz/private/HTML_401F.class/jc1.v05
full callgrind dump after this patch.
2009-06-30 Jakub Jelinek <jakub@redhat.com>
* var-tracking.c (tie_break_pointers): Move earlier.
(canon_value_cmp): Likewise. Add inline keyword.
(variable_union): Guard expensive assert with ENABLE_CHECKING.
(set_slot_part): Optimize search for onepart location in the chain.
--- gcc/var-tracking.c.jj 2009-06-26 15:09:45.000000000 +0200
+++ gcc/var-tracking.c 2009-06-30 09:43:38.000000000 +0200
@@ -1104,6 +1104,41 @@ shared_hash_find (shared_hash vars, decl
return shared_hash_find_1 (vars, dv, dv_htab_hash (dv));
}
+/* Determine a total order between two distinct pointers. Compare the
+ pointers as integral types if size_t is wide enough, otherwise
+ resort to bitwise memory compare. The actual order does not
+ matter, we just need to be consistent, so endianness is
+ irrelevant. */
+
+static int
+tie_break_pointers (const void *p1, const void *p2)
+{
+ gcc_assert (p1 != p2);
+
+ if (sizeof (size_t) >= sizeof (void*))
+ return (size_t)p1 < (size_t)p2 ? -1 : 1;
+ else
+ return memcmp (&p1, &p2, sizeof (p1));
+}
+
+/* Return true if TVAL is better than CVAL as a canonival value. We
+ choose lowest-numbered VALUEs, using the RTX address as a
+ tie-breaker. The idea is to arrange them into a star topology,
+ such that all of them are at most one step away from the canonical
+ value, and the canonical value has backlinks to all of them, in
+ addition to all the actual locations. We don't enforce this
+ topology throughout the entire dataflow analysis, though.
+ */
+
+static inline bool
+canon_value_cmp (rtx tval, rtx cval)
+{
+ return !cval
+ || CSELIB_VAL_PTR (tval)->value < CSELIB_VAL_PTR (cval)->value
+ || (CSELIB_VAL_PTR (tval)->value == CSELIB_VAL_PTR (cval)->value
+ && tie_break_pointers (tval, cval) < 0);
+}
+
static bool dst_can_be_shared;
/* Return a copy of a variable VAR and insert it to dataflow set SET. */
@@ -1774,8 +1809,10 @@ variable_union (void **slot, void *data)
nnode->next = dnode;
dnode = nnode;
}
+#ifdef ENABLE_CHECKING
else if (r == 0)
gcc_assert (rtx_equal_p (dnode->loc, snode->loc));
+#endif
if (r >= 0)
snode = snode->next;
@@ -2319,41 +2356,6 @@ intersect_loc_chains (rtx val, location_
}
}
-/* Determine a total order between two distinct pointers. Compare the
- pointers as integral types if size_t is wide enough, otherwise
- resort to bitwise memory compare. The actual order does not
- matter, we just need to be consistent, so endianness is
- irrelevant. */
-
-static int
-tie_break_pointers (const void *p1, const void *p2)
-{
- gcc_assert (p1 != p2);
-
- if (sizeof (size_t) >= sizeof (void*))
- return (size_t)p1 < (size_t)p2 ? -1 : 1;
- else
- return memcmp (&p1, &p2, sizeof (p1));
-}
-
-/* Return true if TVAL is better than CVAL as a canonival value. We
- choose lowest-numbered VALUEs, using the RTX address as a
- tie-breaker. The idea is to arrange them into a star topology,
- such that all of them are at most one step away from the canonical
- value, and the canonical value has backlinks to all of them, in
- addition to all the actual locations. We don't enforce this
- topology throughout the entire dataflow analysis, though.
- */
-
-static bool
-canon_value_cmp (rtx tval, rtx cval)
-{
- return !cval
- || CSELIB_VAL_PTR (tval)->value < CSELIB_VAL_PTR (cval)->value
- || (CSELIB_VAL_PTR (tval)->value == CSELIB_VAL_PTR (cval)->value
- && tie_break_pointers (tval, cval) < 0);
-}
-
/* Return -1 if X should be before Y in a location list for a 1-part
variable, 1 if Y should be before X, and 0 if they're equivalent
and should not appear in the list. */
@@ -5686,12 +5688,82 @@ set_slot_part (dataflow_set *set, rtx lo
pos = 0;
- for (nextp = &var->var_part[0].loc_chain; (node = *nextp);
- nextp = &node->next)
- if ((r = loc_cmp (node->loc, loc)) >= 0)
- break;
- else
- c++;
+ if (GET_CODE (loc) == VALUE)
+ {
+ for (nextp = &var->var_part[0].loc_chain; (node = *nextp);
+ nextp = &node->next)
+ if (GET_CODE (node->loc) == VALUE)
+ {
+ if (node->loc == loc)
+ {
+ r = 0;
+ break;
+ }
+ if (canon_value_cmp (node->loc, loc))
+ c++;
+ else
+ {
+ r = 1;
+ break;
+ }
+ }
+ else if (REG_P (node->loc) || MEM_P (node->loc))
+ c++;
+ else
+ {
+ r = 1;
+ break;
+ }
+ }
+ else if (REG_P (loc))
+ {
+ for (nextp = &var->var_part[0].loc_chain; (node = *nextp);
+ nextp = &node->next)
+ if (REG_P (node->loc))
+ {
+ if (REGNO (node->loc) < REGNO (loc))
+ c++;
+ else
+ {
+ if (REGNO (node->loc) == REGNO (loc))
+ r = 0;
+ else
+ r = 1;
+ break;
+ }
+ }
+ else
+ {
+ r = 1;
+ break;
+ }
+ }
+ else if (MEM_P (loc))
+ {
+ for (nextp = &var->var_part[0].loc_chain; (node = *nextp);
+ nextp = &node->next)
+ if (REG_P (node->loc))
+ c++;
+ else if (MEM_P (node->loc))
+ {
+ if ((r = loc_cmp (XEXP (node->loc, 0), XEXP (loc, 0))) >= 0)
+ break;
+ else
+ c++;
+ }
+ else
+ {
+ r = 1;
+ break;
+ }
+ }
+ else
+ for (nextp = &var->var_part[0].loc_chain; (node = *nextp);
+ nextp = &node->next)
+ if ((r = loc_cmp (node->loc, loc)) >= 0)
+ break;
+ else
+ c++;
if (r == 0)
return;
Jakub