This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: Fix GCC bug causing bootstrap failure with vectorizer turned on
- From: Richard Biener <richard dot guenther at gmail dot com>
- To: Cong Hou <congh at google dot com>,gcc-patches at gcc dot gnu dot org
- Cc: David Li <davidxl at google dot com>
- Date: Sat, 13 Jul 2013 11:06:57 +0200
- Subject: Re: Fix GCC bug causing bootstrap failure with vectorizer turned on
- References: <CAK=A3=3vpmrMex1++0p54Z_8f6fHATDLZE+1tAWhKBtXdpdi+g at mail dot gmail dot com>
Cong Hou <congh@google.com> wrote:
>GCC bootstrap failed with loop vectorizer turned on by default at O2.
>The symptom is that the comparison between stage2&3 compilers fails.
>The root cause is a bug in the file "tree-vect-data-refs.c", where a
>qsort() function call is used to sort a group of data references using
>a comparison function called "dr_group_sort_cmp()". In this function,
>the iterative hash values of tree nodes are used for comparisons. For
>a declaration tree node, its UID participates in the calculation of
>the hash value. However, a specific declaration may have different
>UIDs whether the debug information is switched on/off (-gtoggle). In
>consequence, the results of comparisons may vary in stage 2&3 during
>bootstrapping.
>
>The following patch fixed the bug. Compiler bootstraps and there is no
>regressions in regression test. Compiler also bootstraps fine when
>turning on vectorizer by default. Since this patch may produce
>difference result (but still correct) than before due to the
>modification to the comparison function, four test cases are adjusted
>accordingly. OK for trunk?
Where does the order of DRs affect code generation? I think there is the correct place to fix this bug.
Richard.
>
>Cong
>
>
>
>Index: gcc/testsuite/gcc.target/i386/l_fma_double_1.c
>===================================================================
>--- gcc/testsuite/gcc.target/i386/l_fma_double_1.c (revision 200893)
>+++ gcc/testsuite/gcc.target/i386/l_fma_double_1.c (working copy)
>@@ -10,9 +10,9 @@
> #include "l_fma_1.h"
>
> /* { dg-final { scan-assembler-times "vfmadd132pd" 4 } } */
>-/* { dg-final { scan-assembler-times "vfmadd213pd" 4 } } */
>+/* { dg-final { scan-assembler-times "vfmadd231pd" 4 } } */
> /* { dg-final { scan-assembler-times "vfmsub132pd" 4 } } */
>-/* { dg-final { scan-assembler-times "vfmsub213pd" 4 } } */
>+/* { dg-final { scan-assembler-times "vfmsub231pd" 4 } } */
> /* { dg-final { scan-assembler-times "vfnmadd132pd" 4 } } */
> /* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */
> /* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */
>Index: gcc/testsuite/gcc.target/i386/l_fma_float_1.c
>===================================================================
>--- gcc/testsuite/gcc.target/i386/l_fma_float_1.c (revision 200893)
>+++ gcc/testsuite/gcc.target/i386/l_fma_float_1.c (working copy)
>@@ -9,9 +9,9 @@
> #include "l_fma_1.h"
>
> /* { dg-final { scan-assembler-times "vfmadd132ps" 4 } } */
>-/* { dg-final { scan-assembler-times "vfmadd213ps" 4 } } */
>+/* { dg-final { scan-assembler-times "vfmadd231ps" 4 } } */
> /* { dg-final { scan-assembler-times "vfmsub132ps" 4 } } */
>-/* { dg-final { scan-assembler-times "vfmsub213ps" 4 } } */
>+/* { dg-final { scan-assembler-times "vfmsub231ps" 4 } } */
> /* { dg-final { scan-assembler-times "vfnmadd132ps" 4 } } */
> /* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */
> /* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */
>Index: gcc/testsuite/gcc.target/i386/l_fma_double_3.c
>===================================================================
>--- gcc/testsuite/gcc.target/i386/l_fma_double_3.c (revision 200893)
>+++ gcc/testsuite/gcc.target/i386/l_fma_double_3.c (working copy)
>@@ -10,9 +10,9 @@
> #include "l_fma_3.h"
>
> /* { dg-final { scan-assembler-times "vfmadd132pd" 4 } } */
>-/* { dg-final { scan-assembler-times "vfmadd213pd" 4 } } */
>+/* { dg-final { scan-assembler-times "vfmadd231pd" 4 } } */
> /* { dg-final { scan-assembler-times "vfmsub132pd" 4 } } */
>-/* { dg-final { scan-assembler-times "vfmsub213pd" 4 } } */
>+/* { dg-final { scan-assembler-times "vfmsub231pd" 4 } } */
> /* { dg-final { scan-assembler-times "vfnmadd132pd" 4 } } */
> /* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */
> /* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */
>Index: gcc/testsuite/gcc.target/i386/l_fma_float_3.c
>===================================================================
>--- gcc/testsuite/gcc.target/i386/l_fma_float_3.c (revision 200893)
>+++ gcc/testsuite/gcc.target/i386/l_fma_float_3.c (working copy)
>@@ -9,9 +9,9 @@
> #include "l_fma_3.h"
>
> /* { dg-final { scan-assembler-times "vfmadd132ps" 4 } } */
>-/* { dg-final { scan-assembler-times "vfmadd213ps" 4 } } */
>+/* { dg-final { scan-assembler-times "vfmadd231ps" 4 } } */
> /* { dg-final { scan-assembler-times "vfmsub132ps" 4 } } */
>-/* { dg-final { scan-assembler-times "vfmsub213ps" 4 } } */
>+/* { dg-final { scan-assembler-times "vfmsub231ps" 4 } } */
> /* { dg-final { scan-assembler-times "vfnmadd132ps" 4 } } */
> /* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */
> /* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */
>Index: gcc/tree-vect-data-refs.c
>===================================================================
>--- gcc/tree-vect-data-refs.c (revision 200893)
>+++ gcc/tree-vect-data-refs.c (working copy)
>@@ -2311,6 +2311,80 @@
> return vect_analyze_group_access (dr);
> }
>
>+
>+
>+/* Compare two tree nodes. This function is used to compare two
>+ data-references as below. */
>+
>+static int
>+compare_tree (tree t1, tree t2)
>+{
>+ int i, cmp;
>+ enum tree_code code;
>+ char tclass;
>+
>+ if (t1 == t2)
>+ return 0;
>+ if (t1 == NULL)
>+ return -1;
>+ if (t2 == NULL)
>+ return 1;
>+
>+
>+ if (TREE_CODE (t1) != TREE_CODE (t2))
>+ return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
>+
>+ code = TREE_CODE (t1);
>+ switch (code)
>+ {
>+ /* For const values, we can just use hash values for comparisons.
>*/
>+ case INTEGER_CST:
>+ case REAL_CST:
>+ case FIXED_CST:
>+ case STRING_CST:
>+ case COMPLEX_CST:
>+ case VECTOR_CST:
>+ {
>+ hashval_t h1 = iterative_hash_expr (t1, 0);
>+ hashval_t h2 = iterative_hash_expr (t2, 0);
>+ if (h1 != h2)
>+ return h1 < h2 ? -1 : 1;
>+ break;
>+ }
>+
>+ case SSA_NAME:
>+ cmp = compare_tree (SSA_NAME_VAR (t1), SSA_NAME_VAR (t2));
>+ if (cmp != 0)
>+ return cmp;
>+
>+ if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
>+ return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
>+ break;
>+
>+ default:
>+ tclass = TREE_CODE_CLASS (code);
>+
>+ /* For var-decl, we could compare their UIDs. */
>+ if (tclass == tcc_declaration)
>+ {
>+ if (DECL_UID (t1) != DECL_UID (t2))
>+ return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
>+ break;
>+ }
>+
>+ /* For expressions with operands, compare their operands
>recursively. */
>+ for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
>+ {
>+ cmp = compare_tree (TREE_OPERAND (t1, i), TREE_OPERAND (t2, i));
>+ if (cmp != 0)
>+ return cmp;
>+ }
>+ }
>+
>+ return 0;
>+}
>+
>+
> /* Compare two data-references DRA and DRB to group them into chunks
> suitable for grouping. */
>
>@@ -2319,7 +2393,6 @@
> {
> data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_);
> data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_);
>- hashval_t h1, h2;
> int cmp;
>
> /* Stabilize sort. */
>@@ -2329,19 +2402,17 @@
> /* Ordering of DRs according to base. */
>if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0))
> {
>- h1 = iterative_hash_expr (DR_BASE_ADDRESS (dra), 0);
>- h2 = iterative_hash_expr (DR_BASE_ADDRESS (drb), 0);
>- if (h1 != h2)
>- return h1 < h2 ? -1 : 1;
>+ cmp = compare_tree (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS
>(drb));
>+ if (cmp != 0)
>+ return cmp;
> }
>
> /* And according to DR_OFFSET. */
> if (!dr_equal_offsets_p (dra, drb))
> {
>- h1 = iterative_hash_expr (DR_OFFSET (dra), 0);
>- h2 = iterative_hash_expr (DR_OFFSET (drb), 0);
>- if (h1 != h2)
>- return h1 < h2 ? -1 : 1;
>+ cmp = compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
>+ if (cmp != 0)
>+ return cmp;
> }
>
> /* Put reads before writes. */
>@@ -2352,19 +2423,18 @@
> if (!operand_equal_p (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
> TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))), 0))
> {
>- h1 = iterative_hash_expr (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF
>(dra))), 0);
>- h2 = iterative_hash_expr (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF
>(drb))), 0);
>- if (h1 != h2)
>- return h1 < h2 ? -1 : 1;
>+ cmp = compare_tree (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
>+ TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
>+ if (cmp != 0)
>+ return cmp;
> }
>
> /* And after step. */
> if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
> {
>- h1 = iterative_hash_expr (DR_STEP (dra), 0);
>- h2 = iterative_hash_expr (DR_STEP (drb), 0);
>- if (h1 != h2)
>- return h1 < h2 ? -1 : 1;
>+ cmp = compare_tree (DR_STEP (dra), DR_STEP (drb));
>+ if (cmp != 0)
>+ return cmp;
> }
>
> /* Then sort after DR_INIT. In case of identical DRs sort after
>stmt UID. */