Bug 20671

Summary: Poor bit-field code generation
Product: gcc Reporter: Dave Richards <dave>
Component: middle-endAssignee: Not yet assigned to anyone <unassigned>
Status: NEW ---    
Severity: enhancement CC: gcc-bugs
Priority: P2 Keywords: missed-optimization
Version: 4.0.0   
Target Milestone: ---   
Host: Target:
Build: Known to work:
Known to fail: Last reconfirmed: 2005-11-13 16:52:38
Bug Depends on:    
Bug Blocks: 19466    

Description Dave Richards 2005-03-28 19:25:17 UTC
This test simulates the process of clearing the "present" bit
in an x86 page table entry.  The code *should* load the PTE,
clear the present bit, and store the PTE.  On x86, it is
possible to perform these 3 steps in a single instruction, i.e.
AND memory with immediate operand.  The code should also be
preceede with a null check for the access object.

package Bit_Test is

  type Page_Frame_Number is
    mod 2 ** 20;

  type Page_Table_Entry is
    record
      P   : Boolean;
      RW  : Boolean;
      U   : Boolean;
      PWT : Boolean;
      PCD : Boolean;
      A   : Boolean;
      D   : Boolean;
      PSE : Boolean;
      G   : Boolean;
      PFN : Page_Frame_Number;
    end record;

  for Page_Table_Entry use
    record
      P   at 0 range  0 ..  0;
      RW  at 0 range  1 ..  1;
      U   at 0 range  2 ..  2;
      PWT at 0 range  3 ..  3;
      PCD at 0 range  4 ..  4;
      A   at 0 range  5 ..  5;
      D   at 0 range  6 ..  6;
      PSE at 0 range  7 ..  7;
      G   at 0 range  8 ..  8;
      PFN at 0 range 12 .. 31;
    end record;

  type Page_Table_Entry_Access is
    access Page_Table_Entry;

  procedure Invalidate_PTE (
    PTE : in Page_Table_Entry_Access
    );

end Bit_Test;

package body Bit_Test is

  procedure Invalidate_PTE (
    PTE : in Page_Table_Entry_Access
    ) is
  begin
    PTE.all := (
      P   => False,
      RW  => PTE.RW,
      U   => PTE.U,
      PWT => PTE.PWT,
      PCD => PTE.PCD,
      A   => PTE.A,
      D   => PTE.D,
      PSE => PTE.PSE,
      G   => PTE.G,
      PFN => PTE.PFN
    );
  end Invalidate_PTE;

end Bit_Test;

Code generated for Invalidate_PTR:

000000a0 <bit_test__invalidate_pte>:
  a0:   55                      push   %ebp
  a1:   89 e5                   mov    %esp,%ebp
  a3:   57                      push   %edi
  a4:   56                      push   %esi
  a5:   53                      push   %ebx
  a6:   83 ec 1c                sub    $0x1c,%esp
  a9:   8b 7d 08                mov    0x8(%ebp),%edi
  ac:   85 ff                   test   %edi,%edi
  ae:   0f 84 e0 00 00 00       je     194 <bit_test__invalidate_pte+0xf4>
  b4:   0f b6 07                movzbl (%edi),%eax
  b7:   88 c2                   mov    %al,%dl
  b9:   88 c1                   mov    %al,%cl
  bb:   d0 ea                   shr    %dl
  bd:   88 c3                   mov    %al,%bl
  bf:   80 e2 01                and    $0x1,%dl
  c2:   88 55 eb                mov    %dl,0xffffffeb(%ebp)
  c5:   88 c2                   mov    %al,%dl
  c7:   c0 ea 04                shr    $0x4,%dl
  ca:   80 e2 01                and    $0x1,%dl
  cd:   88 55 ef                mov    %dl,0xffffffef(%ebp)
  d0:   88 c2                   mov    %al,%dl
  d2:   c0 ea 05                shr    $0x5,%dl
  d5:   80 e2 01                and    $0x1,%dl
  d8:   88 55 f0                mov    %dl,0xfffffff0(%ebp)
  db:   88 c2                   mov    %al,%dl
  dd:   c0 ea 06                shr    $0x6,%dl
  e0:   80 e2 01                and    $0x1,%dl
  e3:   88 55 f1                mov    %dl,0xfffffff1(%ebp)
  e6:   88 c2                   mov    %al,%dl
  e8:   24 fe                   and    $0xfe,%al
  ea:   c0 ea 07                shr    $0x7,%dl
  ed:   88 55 f2                mov    %dl,0xfffffff2(%ebp)
  f0:   c0 e9 02                shr    $0x2,%cl
  f3:   0f b6 57 01             movzbl 0x1(%edi),%edx
  f7:   80 e1 01                and    $0x1,%cl
  fa:   c0 eb 03                shr    $0x3,%bl
  fd:   80 e3 01                and    $0x1,%bl
 100:   80 e2 01                and    $0x1,%dl
 103:   88 55 f3                mov    %dl,0xfffffff3(%ebp)
 106:   8b 37                   mov    (%edi),%esi
 108:   88 07                   mov    %al,(%edi)
 10a:   0f b6 55 eb             movzbl 0xffffffeb(%ebp),%edx
 10e:   8b 07                   mov    (%edi),%eax
 110:   01 d2                   add    %edx,%edx
 112:   83 e0 fd                and    $0xfffffffd,%eax
 115:   09 d0                   or     %edx,%eax
 117:   0f b6 d1                movzbl %cl,%edx
 11a:   89 07                   mov    %eax,(%edi)
 11c:   c1 e2 02                shl    $0x2,%edx
 11f:   83 e0 fb                and    $0xfffffffb,%eax
 122:   09 d0                   or     %edx,%eax
 124:   0f b6 d3                movzbl %bl,%edx
 127:   89 07                   mov    %eax,(%edi)
 129:   c1 e2 03                shl    $0x3,%edx
 12c:   83 e0 f7                and    $0xfffffff7,%eax
 12f:   09 d0                   or     %edx,%eax
 131:   89 07                   mov    %eax,(%edi)
 133:   83 e0 ef                and    $0xffffffef,%eax
 136:   0f b6 55 ef             movzbl 0xffffffef(%ebp),%edx
 13a:   c1 e2 04                shl    $0x4,%edx
 13d:   09 d0                   or     %edx,%eax
 13f:   89 07                   mov    %eax,(%edi)
 141:   83 e0 df                and    $0xffffffdf,%eax
 144:   0f b6 55 f0             movzbl 0xfffffff0(%ebp),%edx
 148:   c1 e2 05                shl    $0x5,%edx
 14b:   09 d0                   or     %edx,%eax
 14d:   89 07                   mov    %eax,(%edi)
 14b:   09 d0                   or     %edx,%eax
 14d:   89 07                   mov    %eax,(%edi)
 14f:   83 e0 bf                and    $0xffffffbf,%eax
 152:   0f b6 55 f1             movzbl 0xfffffff1(%ebp),%edx
 156:   c1 e2 06                shl    $0x6,%edx
 159:   09 d0                   or     %edx,%eax
 15b:   89 07                   mov    %eax,(%edi)
 15d:   0f b6 55 f2             movzbl 0xfffffff2(%ebp),%edx
 161:   c1 e2 07                shl    $0x7,%edx
 164:   25 7f ff ff ff          and    $0xffffff7f,%eax
 169:   09 d0                   or     %edx,%eax
 16b:   81 e6 00 f0 ff ff       and    $0xfffff000,%esi
 171:   89 07                   mov    %eax,(%edi)
 173:   25 ff fe ff ff          and    $0xfffffeff,%eax
 178:   0f b6 55 f3             movzbl 0xfffffff3(%ebp),%edx
 17c:   c1 e2 08                shl    $0x8,%edx
 17f:   09 d0                   or     %edx,%eax
 181:   89 07                   mov    %eax,(%edi)
 183:   25 ff 0f 00 00          and    $0xfff,%eax
 188:   09 f0                   or     %esi,%eax
 18a:   89 07                   mov    %eax,(%edi)
 18c:   83 c4 1c                add    $0x1c,%esp
 18f:   5b                      pop    %ebx
 190:   5e                      pop    %esi
 191:   5f                      pop    %edi
 192:   5d                      pop    %ebp
 193:   c3                      ret
Comment 1 Andrew Pinski 2005-03-28 22:40:21 UTC
What options did you use to get the x86 asm?
Comment 2 Andrew Pinski 2005-03-28 22:52:30 UTC
Don't worry about it, I can reproduce it on PPC:
        lwz r0,0(r12)
        rlwinm r3,r0,0,1,31
        mr r4,r3
        stw r3,0(r12)
        rlwimi r4,r0,0,1,1
        mr r5,r4
        stw r4,0(r12)


Trying to find an equivalent C testcase. Though it is hard.
Comment 3 Dave Richards 2005-03-28 23:34:56 UTC
gnatmake -O3 bit_test
objdump --disassemble -r bit_test.o
Comment 4 Alexandre Oliva 2007-04-05 23:36:23 UTC
No change with the patch for PR 22156.