[PATCH] RISC-V: Enable overlap-by-pieces in case of fast unaliged access

Vineet Gupta vineetg@rivosinc.com
Tue Nov 2 19:27:18 GMT 2021


On 7/22/21 6:29 AM, Kito Cheng via Gcc-patches wrote:
> Could you add a testcase? Otherwise LGTM.
> 
> Option: -O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64
> void foo(char *dst){
>     __builtin_memset(dst, 0, 15);
> }
> 
> On Thu, Jul 22, 2021 at 8:53 PM Christoph Muellner via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
>>
>> This patch enables the overlap-by-pieces feature of the by-pieces
>> infrastructure for inlining builtins in case the target has set
>> riscv_slow_unaligned_access_p to false.
>>
>> To demonstrate the effect for targets with fast unaligned access,
>> the following code sequences are generated for a 15-byte memset-zero.
>>
>> Without overlap_op_by_pieces we get:
>>    8e:   00053023                sd      zero,0(a0)
>>    92:   00052423                sw      zero,8(a0)
>>    96:   00051623                sh      zero,12(a0)
>>    9a:   00050723                sb      zero,14(a0)

To generate even the non optimized code above with gcc 11 [1][2], what 
do I need to do. Despite -mno-strict-align and trying -mtune={rocket, 
sifive-7-series}, I only get the fully unrolled version

foo:
# memcpy-15.c:2:    __builtin_memset(dst, 0, 15);
	sb	zero,0(a0)	#, MEM <char[1:15]> [(void *)dst_2(D)]
	sb	zero,1(a0)	#, MEM <char[1:15]> [(void *)dst_2(D)]
	sb	zero,2(a0)	#, MEM <char[1:15]> [(void *)dst_2(D)]
	sb	zero,3(a0)	#, MEM <char[1:15]> [(void *)dst_2(D)]
	sb	zero,4(a0)	#, MEM <char[1:15]> [(void *)dst_2(D)]
	sb	zero,5(a0)	#, MEM <char[1:15]> [(void *)dst_2(D)]
	sb	zero,6(a0)	#, MEM <char[1:15]> [(void *)dst_2(D)]
	sb	zero,7(a0)	#, MEM <char[1:15]> [(void *)dst_2(D)]
	sb	zero,8(a0)	#, MEM <char[1:15]> [(void *)dst_2(D)]
	sb	zero,9(a0)	#, MEM <char[1:15]> [(void *)dst_2(D)]
	sb	zero,10(a0)	#, MEM <char[1:15]> [(void *)dst_2(D)]
	sb	zero,11(a0)	#, MEM <char[1:15]> [(void *)dst_2(D)]
	sb	zero,12(a0)	#, MEM <char[1:15]> [(void *)dst_2(D)]
	sb	zero,13(a0)	#, MEM <char[1:15]> [(void *)dst_2(D)]
	sb	zero,14(a0)	#, MEM <char[1:15]> [(void *)dst_2(D)]
	ret	
	.size	foo, .-foo
	.ident	"GCC: (GNU) 11.1.0"

[1] https://gcc.gnu.org/pipermail/gcc-patches/2021-October/581858.html
[2] https://github.com/kito-cheng/riscv-gcc/tree/riscv-gcc-11.1.0-zbabcs

Thx,
-Vineet

>>
>> With overlap_op_by_pieces we get:
>>    7e:   00053023                sd      zero,0(a0)
>>    82:   000533a3                sd      zero,7(a0)
>>
>> gcc/ChangeLog:
>>
>>          * config/riscv/riscv.c (riscv_overlap_op_by_pieces): New function.
>>          (TARGET_OVERLAP_OP_BY_PIECES_P): Connect to
>>          riscv_overlap_op_by_pieces.
>>
>> Signed-off-by: Christoph Muellner <cmuellner@gcc.gnu.org>
>> ---
>>   gcc/config/riscv/riscv.c | 11 +++++++++++
>>   1 file changed, 11 insertions(+)
>>
>> diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
>> index 576960bb37c..98c76ba657a 100644
>> --- a/gcc/config/riscv/riscv.c
>> +++ b/gcc/config/riscv/riscv.c
>> @@ -5201,6 +5201,14 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)
>>     return riscv_slow_unaligned_access_p;
>>   }
>>
>> +/* Implement TARGET_OVERLAP_OP_BY_PIECES_P.  */
>> +
>> +static bool
>> +riscv_overlap_op_by_pieces (void)
>> +{
>> +  return !riscv_slow_unaligned_access_p;
>> +}
>> +
>>   /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
>>
>>   static bool
>> @@ -5525,6 +5533,9 @@ riscv_asan_shadow_offset (void)
>>   #undef TARGET_SLOW_UNALIGNED_ACCESS
>>   #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
>>
>> +#undef TARGET_OVERLAP_OP_BY_PIECES_P
>> +#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
>> +
>>   #undef TARGET_SECONDARY_MEMORY_NEEDED
>>   #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
>>
>> --
>> 2.31.1
>>
> 



More information about the Gcc-patches mailing list