[PATCH] expand: Add new clrsb fallback expansion [PR101950]

Richard Biener rguenther@suse.de
Thu Aug 19 08:49:02 GMT 2021


On Thu, 19 Aug 2021, Jakub Jelinek wrote:

> Hi!
> 
> As suggested in the PR, the following patch adds two new clrsb
> expansion possibilities if target doesn't have clrsb_optab for the
> requested nor wider modes, but does have clz_optab for the requested
> mode.
> One expansion is
> clrsb (op0)
> expands as
> clz (op0 ^ (((stype)op0) >> (prec-1))) - 1
> which is usable if CLZ_DEFINED_VALUE_AT_ZERO is 2 with value
> of prec, because the clz argument can be 0 and clrsb should give
> prec-1 in that case.
> The other expansion is
> clz (((op0 << 1) ^ (((stype)op0) >> (prec-1))) | 1)
> where the clz argument is never 0, but it is one operation longer.
> E.g. on x86_64-linux with -O2 -mno-lzcnt, this results for
> int foo (int x) { return __builtin_clrsb (x); }
> in
> -	subq	$8, %rsp
> -	movslq	%edi, %rdi
> -	call	__clrsbdi2
> -	addq	$8, %rsp
> -	subl	$32, %eax
> +	leal	(%rdi,%rdi), %eax
> +	sarl	$31, %edi
> +	xorl	%edi, %eax
> +	orl	$1, %eax
> +	bsrl	%eax, %eax
> +	xorl	$31, %eax
> and with -O2 -mlzcnt:
> +	movl	%edi, %eax
> +	sarl	$31, %eax
> +	xorl	%edi, %eax
> +	lzcntl	%eax, %eax
> +	subl	$1, %eax
> On armv7hl-linux-gnueabi with -O2:
> -	push	{r4, lr}
> -	bl	__clrsbsi2
> -	pop	{r4, pc}
> +	@ link register save eliminated.
> +	eor	r0, r0, r0, asr #31
> +	clz	r0, r0
> +	sub	r0, r0, #1
> +	bx	lr
> As it (at least usually) will make code larger, it is
> disabled for -Os or cold instructions.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Richard.

> 2021-08-19  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR middle-end/101950
> 	* optabs.c (expand_clrsb_using_clz): New function.
> 	(expand_unop): Use it as another clrsb expansion fallback.
> 
> 	* gcc.target/i386/pr101950-1.c: New test.
> 	* gcc.target/i386/pr101950-2.c: New test.
> 
> --- gcc/optabs.c.jj	2021-07-15 10:16:13.027581160 +0200
> +++ gcc/optabs.c	2021-08-18 13:36:56.410818265 +0200
> @@ -2600,6 +2600,82 @@ widen_leading (scalar_int_mode mode, rtx
>    return 0;
>  }
>  
> +/* Attempt to emit (clrsb:mode op0) as
> +   (plus:mode (clz:mode (xor:mode op0 (ashr:mode op0 (const_int prec-1))))
> +	      (const_int -1))
> +   if CLZ_DEFINED_VALUE_AT_ZERO (mode, val) is 2 and val is prec,
> +   or as
> +   (clz:mode (ior:mode (xor:mode (ashl:mode op0 (const_int 1))
> +				 (ashr:mode op0 (const_int prec-1)))
> +		       (const_int 1)))
> +   otherwise.  */
> +
> +static rtx
> +expand_clrsb_using_clz (scalar_int_mode mode, rtx op0, rtx target)
> +{
> +  if (optimize_insn_for_size_p ()
> +      || optab_handler (clz_optab, mode) == CODE_FOR_nothing)
> +    return NULL_RTX;
> +
> +  start_sequence ();
> +  HOST_WIDE_INT val = 0;
> +  if (CLZ_DEFINED_VALUE_AT_ZERO (mode, val) != 2
> +      || val != GET_MODE_PRECISION (mode))
> +    val = 0;
> +  else
> +    val = 1;
> +
> +  rtx temp2 = op0;
> +  if (!val)
> +    {
> +      temp2 = expand_binop (mode, ashl_optab, op0, const1_rtx,
> +			    NULL_RTX, 0, OPTAB_DIRECT);
> +      if (!temp2)
> +	{
> +	fail:
> +	  end_sequence ();
> +	  return NULL_RTX;
> +	}
> +    }
> +
> +  rtx temp = expand_binop (mode, ashr_optab, op0,
> +			   GEN_INT (GET_MODE_PRECISION (mode) - 1),
> +			   NULL_RTX, 0, OPTAB_DIRECT);
> +  if (!temp)
> +    goto fail;
> +
> +  temp = expand_binop (mode, xor_optab, temp2, temp, NULL_RTX, 0,
> +		       OPTAB_DIRECT);
> +  if (!temp)
> +    goto fail;
> +
> +  if (!val)
> +    {
> +      temp = expand_binop (mode, ior_optab, temp, const1_rtx,
> +			   NULL_RTX, 0, OPTAB_DIRECT);
> +      if (!temp)
> +	goto fail;
> +    }
> +  temp = expand_unop_direct (mode, clz_optab, temp, val ? NULL_RTX : target,
> +			     true);
> +  if (!temp)
> +    goto fail;
> +  if (val)
> +    {
> +      temp = expand_binop (mode, add_optab, temp, constm1_rtx,
> +			   target, 0, OPTAB_DIRECT);
> +      if (!temp)
> +	goto fail;
> +    }
> +
> +  rtx_insn *seq = get_insns ();
> +  end_sequence ();
> +
> +  add_equal_note (seq, temp, CLRSB, op0, NULL_RTX, mode);
> +  emit_insn (seq);
> +  return temp;
> +}
> +
>  /* Try calculating clz of a double-word quantity as two clz's of word-sized
>     quantities, choosing which based on whether the high word is nonzero.  */
>  static rtx
> @@ -3171,6 +3247,9 @@ expand_unop (machine_mode mode, optab un
>  	  temp = widen_leading (int_mode, op0, target, unoptab);
>  	  if (temp)
>  	    return temp;
> +	  temp = expand_clrsb_using_clz (int_mode, op0, target);
> +	  if (temp)
> +	    return temp;
>  	}
>        goto try_libcall;
>      }
> --- gcc/testsuite/gcc.target/i386/pr101950-1.c.jj	2021-08-18 13:58:05.363093681 +0200
> +++ gcc/testsuite/gcc.target/i386/pr101950-1.c	2021-08-18 14:01:22.905335834 +0200
> @@ -0,0 +1,20 @@
> +/* PR middle-end/101950 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mno-lzcnt" } */
> +/* { dg-final { scan-assembler-not "call\[^\n\r]*__clrsb.i2" } } */
> +/* { dg-final { scan-assembler-times "\tbsr\[ql]\t" 2 } } */
> +/* { dg-final { scan-assembler-times "\txor\[ql]\t" 4 } } */
> +/* { dg-final { scan-assembler-times "\tor\[ql]\t" 2 } } */
> +/* { dg-final { scan-assembler-times "\tsar\[ql]\t|\tcltd" 2 } } */
> +
> +int
> +foo (long x)
> +{
> +  return __builtin_clrsbl (x);
> +}
> +
> +int
> +bar (int x)
> +{
> +  return __builtin_clrsb (x);
> +}
> --- gcc/testsuite/gcc.target/i386/pr101950-2.c.jj	2021-08-18 13:58:11.367009865 +0200
> +++ gcc/testsuite/gcc.target/i386/pr101950-2.c	2021-08-18 14:01:18.540396835 +0200
> @@ -0,0 +1,19 @@
> +/* PR middle-end/101950 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mlzcnt" } */
> +/* { dg-final { scan-assembler-not "call\[^\n\r]*__clrsb.i2" } } */
> +/* { dg-final { scan-assembler-times "\tlzcnt\[ql]\t" 2 } } */
> +/* { dg-final { scan-assembler-times "\txor\[ql]\t" 2 } } */
> +/* { dg-final { scan-assembler-times "\tsar\[ql]\t|\tcltd" 2 } } */
> +
> +int
> +foo (long x)
> +{
> +  return __builtin_clrsbl (x);
> +}
> +
> +int
> +bar (int x)
> +{
> +  return __builtin_clrsb (x);
> +}
> 
> 	Jakub
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)


More information about the Gcc-patches mailing list