This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Fix PR68502
- From: Richard Biener <rguenther at suse dot de>
- To: gcc-patches at gcc dot gnu dot org
- Date: Tue, 24 Nov 2015 14:19:16 +0100 (CET)
- Subject: [PATCH] Fix PR68502
- Authentication-results: sourceware.org; auth=none
This restores a check I removed when adding strided group accesses
together with a better explanation and a FIXME comment how to recover
the missed optimizations (not appropriate at this stage).
I've added a realistic testcase from 179.art as well as an
artificial one that also fails on x86_64.
Bootstrap and regtest is running on x86_64-unknwon-linux-gnu.
Richard.
2015-11-24 Richard Biener <rguenther@suse.de>
PR tree-optimization/68502
* tree-vect-data-refs.c (vect_analyze_group_access_1): Restore
check that the step is a multiple of the type size.
* gcc.dg/vect/pr68502-1.c: New testcase.
* gcc.dg/vect/pr68502-2.c: Likewise.
Index: gcc/tree-vect-data-refs.c
===================================================================
*** gcc/tree-vect-data-refs.c (revision 230793)
--- gcc/tree-vect-data-refs.c (working copy)
*************** vect_analyze_group_access_1 (struct data
*** 2176,2181 ****
--- 2198,2224 ----
if (tree_fits_shwi_p (step))
{
dr_step = tree_to_shwi (step);
+ /* Check that STEP is a multiple of type size. Otherwise there is
+ a non-element-sized gap at the end of the group which we
+ cannot represent in GROUP_GAP or GROUP_SIZE.
+ ??? As we can handle non-constant step fine here we should
+ simply remove uses of GROUP_GAP between the last and first
+ element and instead rely on DR_STEP. GROUP_SIZE then would
+ simply not include that gap. */
+ if ((dr_step % type_size) != 0)
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Step ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, step);
+ dump_printf (MSG_NOTE,
+ " is not a multiple of the element size for ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr));
+ dump_printf (MSG_NOTE, "\n");
+ }
+ return false;
+ }
groupsize = absu_hwi (dr_step) / type_size;
}
else
Index: gcc/testsuite/gcc.dg/vect/pr68502-1.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/pr68502-1.c (revision 0)
--- gcc/testsuite/gcc.dg/vect/pr68502-1.c (working copy)
***************
*** 0 ****
--- 1,48 ----
+ #include <stdlib.h>
+ #include "tree-vect.h"
+
+ typedef struct {
+ double *I;
+ double W;
+ double X;
+ double V;
+ double U;
+ double P;
+ double Q;
+ double R;
+ } f1_neuron;
+
+ f1_neuron *f1_layer;
+
+ int numf1s = 1000;
+
+ void __attribute__((noinline,noclone))
+ reset_nodes()
+ {
+ int i;
+
+ for (i=0;i<numf1s;i++)
+ {
+ f1_layer[i].W = 0.0;
+ f1_layer[i].X = 0.0;
+ f1_layer[i].V = 0.0;
+ f1_layer[i].U = 0.0;
+ f1_layer[i].P = 0.0;
+ f1_layer[i].Q = 0.0;
+ f1_layer[i].R = 0.0;
+ }
+ }
+
+ int main ()
+ {
+ int i;
+ check_vect ();
+ f1_layer = (f1_neuron *)malloc (numf1s * sizeof (f1_neuron));
+ for (i = 0; i < numf1s; i++)
+ f1_layer[i].I = (double *)-1;
+ reset_nodes ();
+ for (i = 0; i < numf1s; i++)
+ if (f1_layer[i].I != (double *)-1)
+ abort ();
+ return 0;
+ }
Index: gcc/testsuite/gcc.dg/vect/pr68502-2.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/pr68502-2.c (revision 0)
--- gcc/testsuite/gcc.dg/vect/pr68502-2.c (working copy)
***************
*** 0 ****
--- 1,48 ----
+ #include <stdlib.h>
+ #include "tree-vect.h"
+
+ typedef struct {
+ short I;
+ int W;
+ int X;
+ int V;
+ int U;
+ int P;
+ int Q;
+ int R;
+ } __attribute__((packed)) f1_neuron;
+
+ f1_neuron *f1_layer;
+
+ int numf1s = 1000;
+
+ void __attribute__((noinline,noclone))
+ reset_nodes()
+ {
+ int i;
+
+ for (i=0;i<numf1s;i++)
+ {
+ f1_layer[i].W = 0;
+ f1_layer[i].X = 0;
+ f1_layer[i].V = 0;
+ f1_layer[i].U = 0;
+ f1_layer[i].P = 0;
+ f1_layer[i].Q = 0;
+ f1_layer[i].R = 0;
+ }
+ }
+
+ int main ()
+ {
+ int i;
+ check_vect ();
+ f1_layer = (f1_neuron *)malloc (numf1s * sizeof (f1_neuron));
+ for (i = 0; i < numf1s; i++)
+ f1_layer[i].I = -1;
+ reset_nodes ();
+ for (i = 0; i < numf1s; i++)
+ if (f1_layer[i].I != -1)
+ abort ();
+ return 0;
+ }