Bug 93258 - [10 regression] Missed constant folding from constructor
Summary: [10 regression] Missed constant folding from constructor
Status: RESOLVED WORKSFORME
Alias: None
Product: gcc
Classification: Unclassified
Component: tree-optimization (show other bugs)
Version: 10.0
: P3 normal
Target Milestone: 10.0
Assignee: Not yet assigned to anyone
URL:
Keywords: missed-optimization
Depends on:
Blocks:
 
Reported: 2020-01-14 09:21 UTC by Jan Hubicka
Modified: 2020-01-17 12:46 UTC (History)
3 users (show)

See Also:
Host:
Target:
Build:
Known to work:
Known to fail:
Last reconfirmed: 2020-01-14 00:00:00


Attachments

Note You need to log in before you can comment on or make changes to this bug.
Description Jan Hubicka 2020-01-14 09:21:13 UTC
In the testcase (reduced from folly by Mark Williams):

typedef int a;
template <typename b, b c> struct d { static constexpr b e = c; };
template <bool c> using f = d<bool, c>;
template <typename...> struct g;
template <typename h, typename i> struct g<h, i> : h {};
template <typename> struct j : f<!bool()> {};
template <bool> struct an;
class m {
  struct n {};

public:
  using bm = an<g<j<int>, int>::e>;
  using bn = n;
  template <typename bo> m(bn, bo);
};
class o {
  m bq;
  using bn = m::bn;
  bn k;
  o(o &&) : bq(k, 0) {}
};
namespace bw {
template <class bx> class by {
public:
  constexpr by(bx ca, a cb) : cc(ca), cd(ca + cb) {}
  long cb() { return cd - cc; }
  bx cc, cd;
};
typedef by<const char *> ch;
class ci {
public:
  static o cj(void *, long);
  static o cj(by<const char *> p) {
    long l = p.cb();
    cj(0, l);
  }
};
}
constexpr bw::ch ck{"", 6};
auto fn1() { bw::ci::cj(ck); }

compiled with -O3 both GCC 9 and 10 arrive to the following code before fre3:

;; Function fn1 (_Z3fn1v, funcdef_no=9, decl_uid=2560, cgraph_uid=8, symbol_order=8) (executed once)
                                                                                
fn1 ()                                                                          
{                                                                               
  struct o D.2587;                                                              
  struct by p;                                                                  
  const char * _3;                                                              
  const char * _4;                                                              
  long int _5;                                                                  
                                                                                
  <bb 2> [local count: 1073741824]:                                             
  p = ck;                                                                       
  _3 = MEM[(const char * *)&p];                                                 
  _4 = MEM[(const char * *)&p + 8B];                                            
  _5 = _4 - _3;                                                                 
  D.2587 = bw::ci::cj (0B, _5); [return slot optimization]                      
  D.2587 ={v} {CLOBBER};                                                        
  __builtin_unreachable ();                                                     
                                                                                
}                                                                               

Now GCC9 optimizes in fre3 to:
fn1 ()
{
  struct o D.2585;
  struct by p;

  <bb 2> [local count: 1073741824]:
  p = ck;
  D.2585 = bw::ci::cj (0B, 6); [return slot optimization]
  D.2585 ={v} {CLOBBER};
  __builtin_unreachable ();

}


and GCC10:
fn1 ()
{
  struct o D.2587;
  struct by p;
  const char * _4;
  long int _5;

  <bb 2> [local count: 1073741824]:
  p = ck;
  _4 = MEM[(const char * *)&p + 8B];
  _5 = _4 - "";
  D.2587 = bw::ci::cj (0B, _5); [return slot optimization]
  D.2587 ={v} {CLOBBER};
  __builtin_unreachable ();

}

This seems to be caused by fact that the constructor gets is not formed as valid min invariant:
(gdb) p debug_generic_stmt (res)
(const char *) "" + 6;
$19 = void
(gdb) n
1487                      if (is_gimple_min_invariant (res))
(gdb)
1501      return NULL_TREE;

Is this missed gimplification? GCC9 gets it as &MEM[(void *)"" + 6B]

Mark bisected it to the following:
                                                                                                                                                                               
commit 7a429a9d52a77e4db2eac6d23d8edb69d26d4f9b (HEAD, refs/bisect/good-7a429a9d52a77e4db2eac6d23d8edb69d26d4f9b)                                                              
Author:     jason <jason@138bc75d-0d04-0410-961f-82ee72b054a4>                                                                                                                 
AuthorDate: Mon Jun 10 19:31:49 2019 +0000                                                                                                                                     
Commit:     jason <jason@138bc75d-0d04-0410-961f-82ee72b054a4>                                                                                                                 
CommitDate: Mon Jun 10 19:31:49 2019 +0000                                                                                                                                     
                                                                                                                                                                               
            Reduce constexpr_call memory consumption.                                                                                                                          
                                                                                                                                                                               
            * constexpr.c (cxx_bind_parameters_in_call): Use TREE_VEC rather                                                                                                   
            than TREE_LIST.                                                                                                                                                    
            (constexpr_call_hasher::equal, cxx_bind_parameters_in_call)                                                                                                        
            (cxx_eval_call_expression): Adjust.                                                                                                                                
                                                                                                                                                                               
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@272125 138bc75d-0d04-0410-961f-82ee72b054a4
Comment 1 Richard Biener 2020-01-14 12:59:10 UTC
Global CTORs are not gimplified at all, we have to work hard in canonicalize_constructor_val to make it "valid".  Like

  /* In CONSTRUCTORs we may see unfolded constants like (int (*) ()) 0.  */
  if (TREE_CODE (cval) == INTEGER_CST)
    {
      if (TREE_OVERFLOW_P (cval))
        cval = drop_tree_overflow (cval);
      if (!useless_type_conversion_p (TREE_TYPE (orig_cval), TREE_TYPE (cval)))
        cval = fold_convert (TREE_TYPE (orig_cval), cval);
      return cval;
    }

(const char *) "" + 6; looks like it should be handled by

  if (TREE_CODE (cval) == POINTER_PLUS_EXPR
      && TREE_CODE (TREE_OPERAND (cval, 1)) == INTEGER_CST)
    {
      tree ptr = TREE_OPERAND (cval, 0);
      if (is_gimple_min_invariant (ptr))
        cval = build1_loc (EXPR_LOCATION (cval),
                           ADDR_EXPR, TREE_TYPE (ptr),
                           fold_build2 (MEM_REF, TREE_TYPE (TREE_TYPE (ptr)),
                                        ptr,
                                        fold_convert (ptr_type_node,
                                                      TREE_OPERAND (cval, 1))));
    }

but we need another STRIP_NOPS before the is_gimple_min_invariant (ptr) check
on ptr I guess?

Btw, on trunk I see it optimized, maybe some recent CTOR "fixing" fixed it
again?  Can you double-check?

If it's fixed again I suggest to add the testcase (ck should be elided
in the assembly?)
Comment 2 Martin Liška 2020-01-14 13:53:56 UTC
> Btw, on trunk I see it optimized, maybe some recent CTOR "fixing" fixed it
> again?  Can you double-check?
> 
> If it's fixed again I suggest to add the testcase (ck should be elided
> in the assembly?)

I can confirm it's not an issue:

gcc-bisect.py bisect 'g++ pr93258.cc -O3 -Wno-return-type -c -fdump-tree-optimized=/dev/stdout | grep ", 6)"'
Releases
  7.1.0 (303f81ad7e9f278d)(02 May 2017 12:42): [took: 2.503s] result: OK
  D.2542 = bw::ci::cj (0B, 6); [return slot optimization]
  7.2.0 (586a0829dc38626c)(14 Aug 2017 07:59): [took: 2.480s] result: OK
  D.2542 = bw::ci::cj (0B, 6); [return slot optimization]
  7.3.0 (6184085fc664265d)(25 Jan 2018 08:17): [took: 2.520s] result: OK
  D.2542 = bw::ci::cj (0B, 6); [return slot optimization]
  7.4.0 (adafdb1e7212d53a)(06 Dec 2018 10:00): [took: 2.439s] result: OK
  D.2542 = bw::ci::cj (0B, 6); [return slot optimization]
  7.5.0 (b2d961e7342b5ba4)(14 Nov 2019 07:40): [took: 2.347s] result: OK
  D.2542 = bw::ci::cj (0B, 6); [return slot optimization]
  8.1.0 (406c2abec3f998e9)(02 May 2018 10:13): [took: 2.623s] result: OK
  D.2637 = bw::ci::cj (0B, 6); [return slot optimization]
  8.2.0 (ddeb81e76461fc00)(26 Jul 2018 09:47): [took: 2.563s] result: OK
  D.2637 = bw::ci::cj (0B, 6); [return slot optimization]
  8.3.0 (4c44b708f11eec6f)(22 Feb 2019 14:20): [took: 2.659s] result: OK
  D.2641 = bw::ci::cj (0B, 6); [return slot optimization]
  9.1.0 (c8913260b0756f97)(03 May 2019 07:59): [took: 2.714s] result: OK
  D.2585 = bw::ci::cj (0B, 6); [return slot optimization]
  9.2.0 (a0c06cc27d2146b7)(12 Aug 2019 09:38): [took: 2.714s] result: OK
  D.2585 = bw::ci::cj (0B, 6); [return slot optimization]

  3d77686d2eddf76d(13 Jan 2020 16:39)(joseph@codesourcery.com): [took: 3.305s] result: OK
  D.2606 = bw::ci::cj (0B, 6); [return slot optimization]
Comment 3 Jakub Jelinek 2020-01-14 18:15:52 UTC
Can't reproduce on x86_64-linux:
$ rm pr93258.C.*; /opt/notnfs/gcc-bisect/obj/gcc/cc1plus.272123 -quiet -w -O3 pr93258.C -fdump-tree-fre3; cat pr93258.C.* | tail -n 9
  <bb 2> [local count: 1073741824]:
  p = ck;
  D.2585 = bw::ci::cj (0B, 6); [return slot optimization]
  D.2585 ={v} {CLOBBER};
  __builtin_unreachable ();

}


$ rm pr93258.C.*; /opt/notnfs/gcc-bisect/obj/gcc/cc1plus.272125 -quiet -w -O3 pr93258.C -fdump-tree-fre3; cat pr93258.C.* | tail -n 9
  <bb 2> [local count: 1073741824]:
  p = ck;
  D.2585 = bw::ci::cj (0B, 6); [return slot optimization]
  D.2585 ={v} {CLOBBER};
  __builtin_unreachable ();

}


$ rm pr93258.C.*; /opt/notnfs/gcc-bisect/obj/gcc/cc1plus.280156 -quiet -w -O3 pr93258.C -fdump-tree-fre3; cat pr93258.C.* | tail -n 9
  <bb 2> [local count: 1073741824]:
  p = ck;
  D.2606 = bw::ci::cj (0B, 6); [return slot optimization]
  D.2606 ={v} {CLOBBER};
  __builtin_unreachable ();

}
Comment 4 Richard Biener 2020-01-17 12:09:39 UTC
So you need to come up with another testcase.
Comment 5 Martin Liška 2020-01-17 12:46:35 UTC
Fixed on trunk with g:r10-5888-ge0804c9b5efdf17bbfb692a787df36b86f71af8d.

Bisecting latest revisions
  e0804c9b5efdf17b(10 Jan 2020 13:47)(jason@redhat.com): [took: 4.921s] result: OK

;; Function fn1 (_Z3fn1v, funcdef_no=9, decl_uid=2587, cgraph_uid=8, symbol_order=8) (executed once)

fn1 ()
{
  struct o D.2606;

  <bb 2> [local count: 1073741824]:
  D.2606 = bw::ci::cj (0B, 6); [return slot optimization]
  D.2606 ={v} {CLOBBER};
  __builtin_unreachable ();

}


  640b23d7ff5f3fad(10 Jan 2020 13:46)(jason@redhat.com): [took: 5.350s] result: OK

;; Function fn1 (_Z3fn1v, funcdef_no=9, decl_uid=2587, cgraph_uid=8, symbol_order=8) (executed once)

fn1 ()
{
  const char * p$cd;
  struct o D.2606;
  long int _6;

  <bb 2> [local count: 1073741824]:
  p$cd_5 = ck.cd;
  _6 = p$cd_5 - "";
  D.2606 = bw::ci::cj (0B, _6); [return slot optimization]
  D.2606 ={v} {CLOBBER};
  __builtin_unreachable ();

}