This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
Re: [RFC] optabs and tree-codes for vector operations
Richard Henderson <rth@redhat.com> writes:
> I know we've got some clever bit manipulation things to implement a
> vector ADD_EXPR without an actual vector addition instruction; I
> don't know if there's similar cleverness that can implement
> add-with-saturation.
Here are some. The seem to be worth bothering only with at least 8
subwords, though. Insn/cycle counts for Alpha.
// unsigned saturated add, saturate to 0xff
// 14 insns, ~8 cycles (without constant construction)
inline uint64_t usaddv8qi(uint64_t x, uint64_t y) {
uint64_t t0, t1;
uint64_t signmask = 0x8080808080808080ULL;
t0 = (y ^ x) & signmask;
t1 = (y & x) & signmask;
x &= ~signmask;
y &= ~signmask;
x += y;
t1 |= t0 & x;
t1 = (t1 << 1) - (t1 >> 7);
return (x ^ t0) | t1;
}
// unsigned saturated sub, saturate to 0x00 or 0xff
// 14 insns, 7 cycles (without constant construction)
inline uint64_t ussubv8qi(uint64_t x, uint64_t y) {
uint64_t t0, t1;
uint64_t signmask = 0x8080808080808080ULL;
t0 = (y ^ ~x) & signmask;
t1 = (y & ~x) & signmask;
x |= signmask;
y &= ~signmask;
x -= y;
t1 |= t0 & ~x;
t1 = (t1 << 1) - (t1 >> 7);
return (x ^ t0) & ~t1;
}
// signed saturated add, saturate to 0x80 or 0x7f
// 16 insns, 8 cycles
uint64_t ssaddv8qi(uint64_t x, uint64_t y)
{
uint64_t eq, xv, yv, satmask, satbits, satadd, t0, t1;
uint64_t signmask = 0x8080808080808080ULL;
eq = (x ^ ~y) & signmask;
xv = x & ~signmask;
yv = y & ~signmask;
xv += yv;
satbits = (xv ^ y) & eq;
satadd = satbits >> 7;
satmask = (satbits << 1) - satadd;
xv ^= eq;
t0 = (xv & ~satmask) ^ signmask;
t1 = satadd & ~(xv >> 7);
return t0 - t1;
}
--
Falk