This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: Use nonzero bits to refine range in split_constant_offset (PR 81635)
- From: Richard Biener <richard dot guenther at gmail dot com>
- To: Richard Biener <richard dot guenther at gmail dot com>, GCC Patches <gcc-patches at gcc dot gnu dot org>, Richard Sandiford <richard dot sandiford at linaro dot org>
- Date: Thu, 8 Feb 2018 16:06:35 +0100
- Subject: Re: Use nonzero bits to refine range in split_constant_offset (PR 81635)
- Authentication-results: sourceware.org; auth=none
- References: <87a7wra3ba.fsf@linaro.org> <CAFiYyc3BB7BhTSoUNPaFQXBk45=9ndJDkqKfwevHHnYJXmdqLg@mail.gmail.com> <87d11fy971.fsf@linaro.org>
On Thu, Feb 8, 2018 at 1:09 PM, Richard Sandiford
<richard.sandiford@linaro.org> wrote:
> Richard Biener <richard.guenther@gmail.com> writes:
>> On Fri, Feb 2, 2018 at 3:12 PM, Richard Sandiford
>> <richard.sandiford@linaro.org> wrote:
>>> Index: gcc/tree-data-ref.c
>>> ===================================================================
>>> --- gcc/tree-data-ref.c 2018-02-02 14:03:53.964530009 +0000
>>> +++ gcc/tree-data-ref.c 2018-02-02 14:03:54.184521826 +0000
>>> @@ -721,7 +721,13 @@ split_constant_offset_1 (tree type, tree
>>> if (TREE_CODE (tmp_var) != SSA_NAME)
>>> return false;
>>> wide_int var_min, var_max;
>>> - if (get_range_info (tmp_var, &var_min, &var_max) != VR_RANGE)
>>> + value_range_type vr_type = get_range_info (tmp_var, &var_min,
>>> + &var_max);
>>> + wide_int var_nonzero = get_nonzero_bits (tmp_var);
>>> + signop sgn = TYPE_SIGN (itype);
>>> + if (intersect_range_with_nonzero_bits (vr_type, &var_min,
>>> + &var_max, var_nonzero,
>>> + sgn) != VR_RANGE)
>>
>> Above it looks like we could go from VR_RANGE to VR_UNDEFINED.
>> I'm not sure if the original range-info might be useful in this case -
>> if it may be
>> can we simply use only the range info if it was VR_RANGE?
>
> I think we only drop to VR_UNDEFINED if we have contradictory
> information: nonzero bits says some bits must be clear, but the range
> only contains values for which the bits are set. In that case I think
> we should either be conservative and not use the information, or be
> aggressive and say that we have undefined behaviour, so overflow is OK.
>
> It seems a bit of a fudge to go back to the old range when we know it's
> false, and use it to allow the split some times and not others.
Fine.
> Thanks,
> Richard
>
>>
>> Ok otherwise.
>> Thanks,
>> Richard.
>>
>>> return false;
>>>
>>> /* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)
>>> @@ -729,7 +735,6 @@ split_constant_offset_1 (tree type, tree
>>> operations done in ITYPE. The addition must overflow
>>> at both ends of the range or at neither. */
>>> bool overflow[2];
>>> - signop sgn = TYPE_SIGN (itype);
>>> unsigned int prec = TYPE_PRECISION (itype);
>>> wide_int woff = wi::to_wide (tmp_off, prec);
>>> wide_int op0_min = wi::add (var_min, woff, sgn, &overflow[0]);
>>> Index: gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-3.c
>>> ===================================================================
>>> --- /dev/null 2018-02-02 09:03:36.168354735 +0000
>>> +++ gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-3.c 2018-02-02 14:03:54.183521863 +0000
>>> @@ -0,0 +1,62 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-additional-options "-fno-tree-loop-vectorize" } */
>>> +/* { dg-require-effective-target vect_double } */
>>> +/* { dg-require-effective-target lp64 } */
>>> +
>>> +void
>>> +f1 (double *p, double *q, unsigned int n)
>>> +{
>>> + p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
>>> + q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
>>> + for (unsigned int i = 0; i < n; i += 4)
>>> + {
>>> + double a = q[i] + p[i];
>>> + double b = q[i + 1] + p[i + 1];
>>> + q[i] = a;
>>> + q[i + 1] = b;
>>> + }
>>> +}
>>> +
>>> +void
>>> +f2 (double *p, double *q, unsigned int n)
>>> +{
>>> + p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
>>> + q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
>>> + for (unsigned int i = 0; i < n; i += 2)
>>> + {
>>> + double a = q[i] + p[i];
>>> + double b = q[i + 1] + p[i + 1];
>>> + q[i] = a;
>>> + q[i + 1] = b;
>>> + }
>>> +}
>>> +
>>> +void
>>> +f3 (double *p, double *q, unsigned int n)
>>> +{
>>> + p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
>>> + q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
>>> + for (unsigned int i = 0; i < n; i += 6)
>>> + {
>>> + double a = q[i] + p[i];
>>> + double b = q[i + 1] + p[i + 1];
>>> + q[i] = a;
>>> + q[i + 1] = b;
>>> + }
>>> +}
>>> +
>>> +void
>>> +f4 (double *p, double *q, unsigned int start, unsigned int n)
>>> +{
>>> + p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
>>> + q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
>>> + for (unsigned int i = start & -2; i < n; i += 2)
>>> + {
>>> + double a = q[i] + p[i];
>>> + double b = q[i + 1] + p[i + 1];
>>> + q[i] = a;
>>> + q[i + 1] = b;
>>> + }
>>> +}
>>> +
>>> +/* { dg-final { scan-tree-dump-times "basic block vectorized" 4 "slp1" } } */
>>> Index: gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-4.c
>>> ===================================================================
>>> --- /dev/null 2018-02-02 09:03:36.168354735 +0000
>>> +++ gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-4.c 2018-02-02 14:03:54.183521863 +0000
>>> @@ -0,0 +1,47 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-additional-options "-fno-tree-loop-vectorize" } */
>>> +/* { dg-require-effective-target lp64 } */
>>> +
>>> +void
>>> +f1 (double *p, double *q, unsigned int n)
>>> +{
>>> + p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
>>> + q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
>>> + for (unsigned int i = 0; i < n; i += 1)
>>> + {
>>> + double a = q[i] + p[i];
>>> + double b = q[i + 1] + p[i + 1];
>>> + q[i] = a;
>>> + q[i + 1] = b;
>>> + }
>>> +}
>>> +
>>> +void
>>> +f2 (double *p, double *q, unsigned int n)
>>> +{
>>> + p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
>>> + q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
>>> + for (unsigned int i = 0; i < n; i += 3)
>>> + {
>>> + double a = q[i] + p[i];
>>> + double b = q[i + 1] + p[i + 1];
>>> + q[i] = a;
>>> + q[i + 1] = b;
>>> + }
>>> +}
>>> +
>>> +void
>>> +f3 (double *p, double *q, unsigned int start, unsigned int n)
>>> +{
>>> + p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
>>> + q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
>>> + for (unsigned int i = start; i < n; i += 2)
>>> + {
>>> + double a = q[i] + p[i];
>>> + double b = q[i + 1] + p[i + 1];
>>> + q[i] = a;
>>> + q[i + 1] = b;
>>> + }
>>> +}
>>> +
>>> +/* { dg-final { scan-tree-dump-not "basic block vectorized" "slp1" } } */