The crc32q instruction takes 64-bit operands, but ignores high 32 bits
of the destination operand, and zero-extends the result from 32 bits.
Let's model this in the RTL pattern to avoid zero-extension when the
_mm_crc32_u64 intrinsic is used with a 32-bit type.
PR target/106453
gcc/ChangeLog:
* config/i386/i386.md (sse4_2_crc32di): Model that only low 32
bits of operand 0 are consumed, and the result is zero-extended
to 64 bits.
gcc/testsuite/ChangeLog:
* gcc.target/i386/pr106453.c: New test.
(define_insn "sse4_2_crc32di"
[(set (match_operand:DI 0 "register_operand" "=r")
- (unspec:DI
- [(match_operand:DI 1 "register_operand" "0")
- (match_operand:DI 2 "nonimmediate_operand" "rm")]
- UNSPEC_CRC32))]
+ (zero_extend:DI
+ (unspec:SI
+ [(match_operand:SI 1 "register_operand" "0")
+ (match_operand:DI 2 "nonimmediate_operand" "rm")]
+ UNSPEC_CRC32)))]
"TARGET_64BIT && TARGET_CRC32"
"crc32{q}\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog1")
--- /dev/null
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mcrc32 -dp" } */
+/* { dg-final { scan-assembler-not "zero_extendsidi" } } */
+
+#include <immintrin.h>
+#include <stdint.h>
+
+uint32_t f(uint32_t c, uint64_t *p, size_t n)
+{
+ for (size_t i = 0; i < n; i++)
+ c = _mm_crc32_u64(c, p[i]);
+ return c;
+}