[Bug tree-optimization/106322] tree-vectorize: Wrong code at O2 level (-fno-tree-vectorize is working)
malat at debian dot org
gcc-bugzilla@gcc.gnu.org
Mon Aug 8 10:00:13 GMT 2022
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106322
--- Comment #18 from Mathieu Malaterre <malat at debian dot org> ---
Brushed-up example (with Makefile):
% more Makefile bytes.cc demo.cc
::::::::::::::
Makefile
::::::::::::::
CXXFLAGS := -O2
demo: demo.o bytes.o
$(CXX) $(CXXFLAGS) -o $@ $^ -lhwy
clean:
rm -f bytes.o demo.o
::::::::::::::
bytes.cc
::::::::::::::
#include <cstring>
bool BytesEqual2(const void *bytes1, const void *bytes2, const size_t size) {
return memcmp(bytes1, bytes2, size) == 0;
}
::::::::::::::
demo.cc
::::::::::::::
#include "hwy/aligned_allocator.h"
#include "hwy/highway.h"
#include <cstring>
bool BytesEqual2(const void *p1, const void *p2, const size_t size);
template <class D, class V>
void AssertVecEqual2(D d, const uint16_t *expected, const V &actual) {
const size_t N = 2;
auto actual_lanes = hwy::AllocateAligned<uint16_t>(N);
Store(actual, d, actual_lanes.get());
const uint8_t *expected_array = reinterpret_cast<const uint8_t *>(expected);
const uint8_t *actual_array =
reinterpret_cast<const uint8_t *>(actual_lanes.get());
for (size_t i = 0; i < N; ++i) {
const uint8_t *expected_ptr = expected_array + i * 2;
const uint8_t *actual_ptr = actual_array + i * 2;
#if 1
// trigger bug
if (!BytesEqual2(expected_ptr, actual_ptr, 2)) {
#else
// no bug
if (std::memcmp(expected_ptr, actual_ptr, 2) != 0) {
#endif
abort();
}
}
}
int main() {
hwy::N_EMU128::FixedTag<uint16_t, 2> d;
const size_t N = 2;
hwy::AlignedFreeUniquePtr<uint16_t[]> in_lanes =
hwy::AllocateAligned<uint16_t>(N);
uint16_t expected_lanes[2];
in_lanes[0] = 65535;
in_lanes[1] = 32767;
expected_lanes[0] = 65534;
expected_lanes[1] = 16383;
hwy::N_EMU128::Vec128<uint16_t, 2> v = Load(d, in_lanes.get());
hwy::N_EMU128::Vec128<uint16_t, 2> actual = MulHigh(v, v);
AssertVecEqual2(d, expected_lanes, actual);
}
More information about the Gcc-bugs
mailing list