extended asm - memory constraint

Ian Lance Taylor iant@google.com
Wed Jun 2 04:13:00 GMT 2010


mc2718 <mc2718@gmail.com> writes:

> The short code below highlights the problem. Instead of 10, the output is a
> random value (I compiled it with
> g++ -o tst -O3 -Wall -march=native -mtune=native tst.C). If you add "memory"
> to the clobber list, you get correct output.
>
>
> #include <iostream>
>
> using namespace std;
>
> struct int128 {
>
>   unsigned long long int lo;
>   unsigned long long int hi;
>
> } __attribute__ ((aligned(16))) ;
>
>
> int128 testFN(unsigned long long a) {
>   unsigned long long alo = (unsigned int) a;
>   unsigned long long ahi = (a >> 32);
>   int128 res;
>   asm __volatile__ (
>     // compute basic products (alo * alo, ahi * ahi, 2 * alo * ahi) 
>     "movq      (%0)  , %%xmm7 \n\t"   // xmm7 = (alo, 0)
>     "movhpd    (%1)  , %%xmm7 \n\t"   // xmm7 = (alo, ahi)
>     "movdqa  %%xmm7  , (%2)   \n\t"   // res.lo = alo, res.hi = ahi
>     : //"=&m" (res)
>     : "rV" (&alo), "rV" (&ahi), "rV" (&res)
>     : "%xmm7" //, "memory"
>   );
>   return  res;
> }

This is wrong.  You are telling gcc that the asm code needs the
address of alo and ahi, but you aren't telling gcc that it needs the
value.  This causes gcc to never bother to initialize alo and ahi at
all.  Adding the memory clobber works by forcing gcc to initialize the
memory.

This works:

int128 testFN(unsigned long long a) {
  unsigned long long alo = (unsigned int) a;
  unsigned long long ahi = (a >> 32);
  int128 res;
  asm __volatile__ (
    // compute basic products (alo * alo, ahi * ahi, 2 * alo * ahi) 
    "movq      %1  , %%xmm7 \n\t"   // xmm7 = (alo, 0)
    "movhpd    %2  , %%xmm7 \n\t"   // xmm7 = (alo, ahi)
    "movdqa  %%xmm7  , %0   \n\t"   // res.lo = alo, res.hi = ahi
    : "=m" (res)
    : "m" (alo), "m" (ahi)
    : "%xmm7"
  );
  return  res;
}

You might also consider using the intrinsic functions rather than
explicit asm statements.

Ian



More information about the Gcc-help mailing list