This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

[Bug tree-optimization/57634] New: Missed vectorization for a "fixed point multiplication" reduction

From: "vincenzo.innocente at cern dot ch" <gcc-bugzilla at gcc dot gnu dot org>
To: gcc-bugs at gcc dot gnu dot org
Date: Mon, 17 Jun 2013 08:58:05 +0000
Subject: [Bug tree-optimization/57634] New: Missed vectorization for a "fixed point multiplication" reduction
Auto-submitted: auto-generated

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=57634

            Bug ID: 57634
           Summary: Missed vectorization for a "fixed point
                    multiplication" reduction
           Product: gcc
           Version: 4.9.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: vincenzo.innocente at cern dot ch

I the following code the loop in "red" does not vectorize "because"of
note: reduction: not commutative/associative: s_12 = (unsigned int) _11;
if I use "unsigned long long" everywhere as in redl the reason becomes
reduction: not commutative/associative: s_10 = temp_9 >> 23;

the multiplication in itself vectorizeâ  (for unsigned!)

compiled as
c++ -std=c++11 -march=corei7-avx -mavx2 -Ofast -S FixedF.cc
-ftree-vectorizer-verbose=2 -Wall
with gcc version 4.9.0 20130607 (experimental) [trunk revision 199812] (GCC) 


inline
unsigned int mult(unsigned int a, unsigned int b) {
  typedef unsigned long long ull; // (to support >>)
  // a and b are of the form 1.m with m of Q bits  as int is therefore max
2^(Q+2)-1. a*b is therefore < 2^(2*(Q+2)) 
  constexpr int Q = 23;
  constexpr unsigned long long K  = (1 << (Q-1));
  ull  temp = (ull)(a) * (ull)(b); // result type is operand's type
  // Rounding; mid values are rounded up
  temp += K;
  // Correct by dividing by base   
  return (temp >> Q);  
}

inline
unsigned long long multL(unsigned long long a, unsigned long long b) {
  typedef unsigned long long ull; // (to support >>)
  // a and b are of the form 1.m with m of Q bits. As int is therefore max
2^(Q+2)-1. a*b is therefore < 2^(2*(Q+2)) 
  constexpr int Q = 23;
  constexpr unsigned long long K  = (1 << (Q-1));
  ull  temp = (ull)(a) * (ull)(b); 
  // Rounding; mid values are rounded up
  temp += K;
  // Correct by dividing by base   
  return (temp >> Q);  
}



unsigned int   a[1024];
unsigned int   b[1024];
unsigned int   c[1024];

unsigned long long   al[1024];
unsigned long long   bl[1024];
unsigned long long   cl[1024];


void foo() {
 for (int i=0;i!=1204;++i)
   c[i] = mult(a[i],b[i]);
}


unsigned int red() {
  unsigned int s=1;
  for (int i=0;i!=1204;++i)
    s = mult(s,b[i]);
  return s;
}

unsigned long long redL() {
  unsigned long long s=1;
  for (int i=0;i!=1204;++i)
    s = multL(s,b[i]);
  return s;
}


unsigned int prod() {
  unsigned int s=1;
  for (int i=0;i!=1204;++i)
    s = s*b[i];
  return s;
}

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]