--- /dev/null
+! { dg-do compile }
+! { dg-options "-Ofast" }
+ SUBROUTINE foo (a,b,c,d,trigs,inc1,inc2,inc3,inc4,lot,n,la)
+ IMPLICIT NONE (type, external)
+ INTEGER, PARAMETER :: wp = 8
+ INTEGER, PARAMETER :: iwp = 4
+ INTEGER(iwp) :: inc1
+ INTEGER(iwp) :: inc2
+ INTEGER(iwp) :: inc3
+ INTEGER(iwp) :: inc4
+ INTEGER(iwp) :: la
+ INTEGER(iwp) :: lot
+ INTEGER(iwp) :: n
+
+ REAL(wp) :: a(*)
+ REAL(wp) :: b(*)
+ REAL(wp) :: c(*)
+ REAL(wp) :: d(*)
+ REAL(wp) :: trigs(*)
+
+ REAL(wp) :: c1
+ REAL(wp) :: c2
+ REAL(wp) :: s1
+ REAL(wp) :: s2
+ REAL(wp) :: sin60
+
+ INTEGER(iwp) :: i
+ INTEGER(iwp) :: ia
+ INTEGER(iwp) :: ib
+ INTEGER(iwp) :: ibase
+ INTEGER(iwp) :: ic
+ INTEGER(iwp) :: iink
+ INTEGER(iwp) :: ijk
+ INTEGER(iwp) :: j
+ INTEGER(iwp) :: ja
+ INTEGER(iwp) :: jb
+ INTEGER(iwp) :: jbase
+ INTEGER(iwp) :: jc
+ INTEGER(iwp) :: jink
+ INTEGER(iwp) :: jump
+ INTEGER(iwp) :: k
+ INTEGER(iwp) :: kb
+ INTEGER(iwp) :: kc
+ INTEGER(iwp) :: kstop
+ INTEGER(iwp) :: l
+ INTEGER(iwp) :: m
+
+ sin60=0.866025403784437_wp
+
+ ia = 1
+ ib = ia + (2*m-la)*inc1
+ ic = ib
+ ja = 1
+ jb = ja + jink
+ jc = jb + jink
+
+ DO k = la, kstop, la
+ kb = k + k
+ kc = kb + kb
+ c1 = trigs(kb+1)
+ s1 = trigs(kb+2)
+ c2 = trigs(kc+1)
+ s2 = trigs(kc+2)
+ ibase = 0
+ DO l = 1, la
+ i = ibase
+ j = jbase
+ DO ijk = 1, lot
+ c(ja+j) = a(ia+i) + (a(ib+i)+a(ic+i))
+ d(ja+j) = b(ia+i) + (b(ib+i)-b(ic+i))
+ c(jb+j) = c1*((a(ia+i)-0.5_wp*(a(ib+i)+a(ic+i)))-(sin60*(b(ib+i)+ &
+ & b(ic+i)))) &
+ & - s1*((b(ia+i)-0.5_wp*(b(ib+i)-b(ic+i)))+(sin60*(a(ib+i)- &
+ & a(ic+i))))
+ d(jb+j) = s1*((a(ia+i)-0.5_wp*(a(ib+i)+a(ic+i)))-(sin60*(b(ib+i)+ &
+ & b(ic+i)))) &
+ & + c1*((b(ia+i)-0.5_wp*(b(ib+i)-b(ic+i)))+(sin60*(a(ib+i)- &
+ & a(ic+i))))
+ c(jc+j) = c2*((a(ia+i)-0.5_wp*(a(ib+i)+a(ic+i)))+(sin60*(b(ib+i)+ &
+ & b(ic+i)))) &
+ & - s2*((b(ia+i)-0.5_wp*(b(ib+i)-b(ic+i)))-(sin60*(a(ib+i)- &
+ & a(ic+i))))
+ i = i + inc3
+ j = j + inc4
+ END DO
+ ibase = ibase + inc1
+ jbase = jbase + inc2
+ END DO
+ ia = ia + iink
+ ib = ib + iink
+ ic = ic - iink
+ jbase = jbase + jump
+ END DO
+ END
perm = vertices[idx].perm_out;
else
{
+ bool any_succ_perm_out_m1 = false;
perm = vertices[idx].get_perm_in ();
for (graph_edge *succ = slpg->vertices[idx].succ;
succ; succ = succ->succ_next)
For example see gcc.dg/vect/bb-slp-14.c for a case
that would break. */
if (succ_perm == -1)
- continue;
+ {
+ /* When we handled a non-leaf optimistically, note
+ that so we can adjust its outgoing permute below. */
+ slp_tree succ_node = vertices[succ_idx].node;
+ if (SLP_TREE_DEF_TYPE (succ_node) != vect_external_def
+ && SLP_TREE_DEF_TYPE (succ_node) != vect_constant_def)
+ any_succ_perm_out_m1 = true;
+ continue;
+ }
if (perm == -1)
perm = succ_perm;
else if (succ_perm == 0
}
}
- /* If this is a node we do not want to eventually unshare
- but it can be permuted at will, verify all users have
- the same permutations registered and otherwise drop to
- zero. */
- if (perm == -1
- && SLP_TREE_DEF_TYPE (node) != vect_external_def
- && SLP_TREE_DEF_TYPE (node) != vect_constant_def)
+ /* Adjust any incoming permutes we treated optimistically. */
+ if (perm != -1 && any_succ_perm_out_m1)
{
- int preds_perm = -1;
- for (graph_edge *pred = slpg->vertices[idx].pred;
- pred; pred = pred->pred_next)
+ for (graph_edge *succ = slpg->vertices[idx].succ;
+ succ; succ = succ->succ_next)
{
- int pred_perm = vertices[pred->src].get_perm_in ();
- if (preds_perm == -1)
- preds_perm = pred_perm;
- else if (!vect_slp_perms_eq (perms,
- pred_perm, preds_perm))
- perm = 0;
+ slp_tree succ_node = vertices[succ->dest].node;
+ if (vertices[succ->dest].perm_out == -1
+ && SLP_TREE_DEF_TYPE (succ_node) != vect_external_def
+ && SLP_TREE_DEF_TYPE (succ_node) != vect_constant_def)
+ vertices[succ->dest].perm_out = perm;
}
+ changed = true;
}
if (!vect_slp_perms_eq (perms, perm,
}
}
while (changed);
- statistics_counter_event (cfun, "SLP optimize perm iterations", iteration);
-
- /* Compute pre-order. */
- auto_vec<int> heads;
- heads.reserve (vinfo->slp_instances.length ());
- for (slp_instance inst : vinfo->slp_instances)
- heads.quick_push (SLP_INSTANCE_TREE (inst)->vertex);
- auto_vec<int> po;
- graphds_dfs (slpg, &heads[0], heads.length (), &po, true, NULL, NULL);
-
- /* Propagate materialized permutes to "any" permute nodes. For heads
- ending up as "any" (reductions with just invariants), set them to
- no permute. */
- for (int idx : heads)
- if (vertices[idx].perm_out == -1)
- vertices[idx].perm_out = 0;
- for (i = po.length (); i > 0; --i)
- {
- int idx = po[i-1];
- int perm_in = vertices[idx].get_perm_in ();
- slp_tree node = vertices[idx].node;
- if (SLP_TREE_DEF_TYPE (node) == vect_external_def
- || SLP_TREE_DEF_TYPE (node) == vect_constant_def)
- continue;
- gcc_assert (perm_in != -1);
- for (graph_edge *succ = slpg->vertices[idx].succ;
- succ; succ = succ->succ_next)
- {
- slp_tree succ_node = vertices[succ->dest].node;
- if (SLP_TREE_DEF_TYPE (succ_node) == vect_external_def
- || SLP_TREE_DEF_TYPE (succ_node) == vect_constant_def)
- continue;
- if (vertices[succ->dest].perm_out == -1)
- vertices[succ->dest].perm_out = perm_in;
- else
- /* Propagation should have ensured that all preds have the same
- permutation. */
- gcc_assert (vect_slp_perms_eq (perms, perm_in,
- vertices[succ->dest].perm_out));
- }
- }
+ statistics_histogram_event (cfun, "SLP optimize perm iterations", iteration);
/* Materialize. */
for (i = 0; i < vertices.length (); ++i)