Bug 54802 - Trivial code changes result in different assembly with respect to rotations and bswap.
Summary: Trivial code changes result in different assembly with respect to rotations a...
Status: UNCONFIRMED
Alias: None
Product: gcc
Classification: Unclassified
Component: middle-end (show other bugs)
Version: 4.8.0
: P3 enhancement
Target Milestone: ---
Assignee: Not yet assigned to anyone
URL:
Keywords: missed-optimization
Depends on:
Blocks:
 
Reported: 2012-10-03 23:18 UTC by Jason
Modified: 2012-10-11 14:21 UTC (History)
2 users (show)

See Also:
Host:
Target:
Build:
Known to work:
Known to fail:
Last reconfirmed:


Attachments
Code files (4.31 KB, application/x-tar)
2012-10-03 23:18 UTC, Jason
Details

Note You need to log in before you can comment on or make changes to this bug.
Description Jason 2012-10-03 23:18:40 UTC
Created attachment 28347 [details]
Code files

In some C code, manually inlining constants changes whether or not gcc compiles things to rotations or to bswaps.  In particular, the following code

uint64_t reverse0(uint64_t v) {
  v = ((v >> 1) & 0x5555555555555555ULL) | ((v & 0x5555555555555555ULL) << 1);
  v = ((v >> 2) & 0x3333333333333333ULL) | ((v & 0x3333333333333333ULL) << 2);
  v = ((v >> 4) & 0x0F0F0F0F0F0F0F0FULL) | ((v & 0x0F0F0F0F0F0F0F0FULL) << 4);
  v = ((v >> 8) & 0x00FF00FF00FF00FFULL) | ((v & 0x00FF00FF00FF00FFULL) << 8);
  v = ((v >> 16) & 0x0000FFFF0000FFFFULL) | ((v & 0x0000FFFF0000FFFFULL) << 16);
  const uint64_t
      va = ((v >> 32) & 0x00000000FFFFFFFFULL),
      vb = ((v & 0x00000000FFFFFFFFULL) << 32);
  v = va | vb;
  return v;
}

uint64_t reverse1(uint64_t v) {
  v = ((v >> 1) & 0x5555555555555555ULL) | ((v & 0x5555555555555555ULL) << 1);
  v = ((v >> 2) & 0x3333333333333333ULL) | ((v & 0x3333333333333333ULL) << 2);
  v = ((v >> 4) & 0x0F0F0F0F0F0F0F0FULL) | ((v & 0x0F0F0F0F0F0F0F0FULL) << 4);
  v = ((v >> 8) & 0x00FF00FF00FF00FFULL) | ((v & 0x00FF00FF00FF00FFULL) << 8);
  v = ((v >> 16) & 0x0000FFFF0000FFFFULL) | ((v & 0x0000FFFF0000FFFFULL) << 16);
  v = ((v >> 32) & 0x00000000FFFFFFFFULL) | ((v & 0x00000000FFFFFFFFULL) << 32);
  return v;
}

compiles to 

reverse0:
.LFB8:
	.cfi_startproc
	movq	%rdi, %rdx
	movabsq	$6148914691236517205, %rax
	movabsq	$3689348814741910323, %rcx
	shrq	%rdx
	andq	%rax, %rdx
	andq	%rdi, %rax
	addq	%rax, %rax
	orq	%rdx, %rax
	movq	%rax, %rdx
	andq	%rcx, %rax
	shrq	$2, %rdx
	salq	$2, %rax
	andq	%rcx, %rdx
	movabsq	$1085102592571150095, %rcx
	orq	%rdx, %rax
	movq	%rax, %rdx
	andq	%rcx, %rax
	shrq	$4, %rdx
	salq	$4, %rax
	andq	%rcx, %rdx
	orq	%rdx, %rax
	bswap	%rax
	ret
	.cfi_endproc
.LFE8:
	.size	reverse0, .-reverse0
	.p2align 4,,15
	.globl	reverse1
	.type	reverse1, @function
reverse1:
.LFB9:
	.cfi_startproc
	movq	%rdi, %rdx
	movabsq	$6148914691236517205, %rax
	movabsq	$3689348814741910323, %rcx
	shrq	%rdx
	andq	%rax, %rdx
	andq	%rdi, %rax
	addq	%rax, %rax
	orq	%rdx, %rax
	movq	%rax, %rdx
	andq	%rcx, %rax
	shrq	$2, %rdx
	salq	$2, %rax
	andq	%rcx, %rdx
	movabsq	$1085102592571150095, %rcx
	orq	%rdx, %rax
	movq	%rax, %rdx
	andq	%rcx, %rax
	shrq	$4, %rdx
	salq	$4, %rax
	andq	%rcx, %rdx
	movabsq	$71777214294589695, %rcx
	orq	%rdx, %rax
	movq	%rax, %rdx
	andq	%rcx, %rax
	shrq	$8, %rdx
	salq	$8, %rax
	andq	%rcx, %rdx
	movabsq	$281470681808895, %rcx
	orq	%rdx, %rax
	movq	%rax, %rdx
	andq	%rcx, %rax
	shrq	$16, %rdx
	salq	$16, %rax
	andq	%rcx, %rdx
	orq	%rdx, %rax
	rorq	$32, %rax
	ret
	.cfi_endproc
.LFE9:
	.size	reverse1, .-reverse1
	.p2align 4,,15
	.globl	reverse2
	.type	reverse2, @function


In the code that I'm using this in, reverse0 is 30% faster than reverse1.  I don't think that manual constant inlining, when each constant is used exactly once, should change the assembly code that gcc compiles to.

The relevant (.c, .i, .s, and a log of the command line) files are attached.