This is the mail archive of the
gcc-bugs@gcc.gnu.org
mailing list for the GCC project.
[Bug tree-optimization/57634] New: Missed vectorization for a "fixed point multiplication" reduction
- From: "vincenzo.innocente at cern dot ch" <gcc-bugzilla at gcc dot gnu dot org>
- To: gcc-bugs at gcc dot gnu dot org
- Date: Mon, 17 Jun 2013 08:58:05 +0000
- Subject: [Bug tree-optimization/57634] New: Missed vectorization for a "fixed point multiplication" reduction
- Auto-submitted: auto-generated
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=57634
Bug ID: 57634
Summary: Missed vectorization for a "fixed point
multiplication" reduction
Product: gcc
Version: 4.9.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: tree-optimization
Assignee: unassigned at gcc dot gnu.org
Reporter: vincenzo.innocente at cern dot ch
I the following code the loop in "red" does not vectorize "because"of
note: reduction: not commutative/associative: s_12 = (unsigned int) _11;
if I use "unsigned long long" everywhere as in redl the reason becomes
reduction: not commutative/associative: s_10 = temp_9 >> 23;
the multiplication in itself vectorizeâ (for unsigned!)
compiled as
c++ -std=c++11 -march=corei7-avx -mavx2 -Ofast -S FixedF.cc
-ftree-vectorizer-verbose=2 -Wall
with gcc version 4.9.0 20130607 (experimental) [trunk revision 199812] (GCC)
inline
unsigned int mult(unsigned int a, unsigned int b) {
typedef unsigned long long ull; // (to support >>)
// a and b are of the form 1.m with m of Q bits as int is therefore max
2^(Q+2)-1. a*b is therefore < 2^(2*(Q+2))
constexpr int Q = 23;
constexpr unsigned long long K = (1 << (Q-1));
ull temp = (ull)(a) * (ull)(b); // result type is operand's type
// Rounding; mid values are rounded up
temp += K;
// Correct by dividing by base
return (temp >> Q);
}
inline
unsigned long long multL(unsigned long long a, unsigned long long b) {
typedef unsigned long long ull; // (to support >>)
// a and b are of the form 1.m with m of Q bits. As int is therefore max
2^(Q+2)-1. a*b is therefore < 2^(2*(Q+2))
constexpr int Q = 23;
constexpr unsigned long long K = (1 << (Q-1));
ull temp = (ull)(a) * (ull)(b);
// Rounding; mid values are rounded up
temp += K;
// Correct by dividing by base
return (temp >> Q);
}
unsigned int a[1024];
unsigned int b[1024];
unsigned int c[1024];
unsigned long long al[1024];
unsigned long long bl[1024];
unsigned long long cl[1024];
void foo() {
for (int i=0;i!=1204;++i)
c[i] = mult(a[i],b[i]);
}
unsigned int red() {
unsigned int s=1;
for (int i=0;i!=1204;++i)
s = mult(s,b[i]);
return s;
}
unsigned long long redL() {
unsigned long long s=1;
for (int i=0;i!=1204;++i)
s = multL(s,b[i]);
return s;
}
unsigned int prod() {
unsigned int s=1;
for (int i=0;i!=1204;++i)
s = s*b[i];
return s;
}