how to correctly pass volatile pointer to _mm_loadu_ps?

Paweł Sikora pluto@agmk.net
Sun Jan 23 14:36:00 GMT 2011


hi,

i'm trying to read 128-bits of unaligned data in one atomic move using 'movups' opcode.
the protoype with _mm_loadu_ps was optimized out by the compiler.

$ gcc46 hw_reg.c -Wall -c -O2 -m64 --save-temps
hw_reg.c: In function 'read_mapped_register_128':
hw_reg.c:19:2: warning: passing argument 1 of '_mm_loadu_ps' from incompatible pointer type [enabled by default]
/opt/gcc46/lib/gcc/x86_64-unknown-linux-gnu/4.6.0/include/xmmintrin.h:904:1: note: expected 'const float *' but argument is of type 'const volatile __vector(4) float *'

i've tested another idea with intermediate volatile value:

static __m128 read_mapped_register_128( __m128 volatile const* address, ptrdiff_t index )
{
        __m128 volatile const* p = address + index;
        __m128 volatile const v = _mm_loadu_ps( p );
        return v;
}

but it generates 3 moves while one is enough:

unused_read_128_with_side_effects:
        salq    $4, %rsi
        movups  (%rdi,%rsi), %xmm0
        movaps  %xmm0, -24(%rsp)
        movaps  -24(%rsp), %xmm0
        ret

is there a nice solution in C language for such (one-move) access?

BR,
Pawel.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: hw_reg.c
Type: text/x-csrc
Size: 926 bytes
Desc: not available
URL: <https://gcc.gnu.org/pipermail/gcc-help/attachments/20110123/a75ac4bf/attachment.bin>
-------------- next part --------------
	.file	"hw_reg.c"
	.text
	.p2align 4,,15
	.globl	unused_read_32_with_side_effects
	.type	unused_read_32_with_side_effects, @function
unused_read_32_with_side_effects:
.LFB519:
	.cfi_startproc
	leaq	(%rdi,%rsi,4), %rax
	movl	(%rax), %eax
	ret
	.cfi_endproc
.LFE519:
	.size	unused_read_32_with_side_effects, .-unused_read_32_with_side_effects
	.p2align 4,,15
	.globl	unused_read_64_with_side_effects
	.type	unused_read_64_with_side_effects, @function
unused_read_64_with_side_effects:
.LFB520:
	.cfi_startproc
	leaq	(%rdi,%rsi,8), %rax
	movq	(%rax), %rax
	ret
	.cfi_endproc
.LFE520:
	.size	unused_read_64_with_side_effects, .-unused_read_64_with_side_effects
	.p2align 4,,15
	.globl	unused_read_128_with_side_effects
	.type	unused_read_128_with_side_effects, @function
unused_read_128_with_side_effects:
.LFB521:
	.cfi_startproc
	rep
	ret
	.cfi_endproc
.LFE521:
	.size	unused_read_128_with_side_effects, .-unused_read_128_with_side_effects
	.ident	"GCC: (GNU) 4.6.0 20110122 (experimental)"
	.section	.note.GNU-stack,"",@progbits


More information about the Gcc-help mailing list