PATCH: Properly generate X32 IE sequence

H.J. Lu hjl.tools@gmail.com
Mon Mar 19 16:47:00 GMT 2012


On Mon, Mar 19, 2012 at 9:37 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Mon, Mar 19, 2012 at 5:34 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>
>>>> Combine failed:
>>>>
>>>> (set (reg:QI 63 [ c ])
>>>>    (mem/c:QI (plus:DI (zero_extend:DI (unspec:SI [
>>>>                        (const_int 0 [0])
>>>>                    ] UNSPEC_TP))
>>>>            (mem/u/c:DI (const:DI (unspec:DI [
>>>>                            (symbol_ref:SI ("c") [flags 0x60]
>>>> <var_decl 0x7ffff19b8140 c>)
>>>>                        ] UNSPEC_GOTNTPOFF)) [2 S8 A8])) [0 c+0 S1 A8]))
>>>>
>>>>
>>>
>>> Wrong testcase.  IT should be
>>>
>>> --
>>> extern __thread char c;
>>> extern __thread short w;
>>> extern char y;
>>> extern short i;
>>> void
>>> ie (void)
>>> {
>>>  y = c;
>>>  i = w;
>>> }
>>> ---
>>>
>>> I got
>>>
>>>        movl    %fs:0, %eax
>>>        movq    c@gottpoff(%rip), %rdx
>>>        movzbl  (%rax,%rdx), %edx
>>>        movb    %dl, y(%rip)
>>>        movq    w@gottpoff(%rip), %rdx
>>>        movzwl  (%rax,%rdx), %eax
>>>        movw    %ax, i(%rip)
>>>        ret
>>>
>>> It can be
>>>
>>>        movq    c@gottpoff(%rip), %rax
>>>        movzbl  %fs:(%rax), %eax
>>>        movb    %al, y(%rip)
>>>        movq    w@gottpoff(%rip), %rax
>>>        movzwl  %fs:(%rax), %eax
>>>        movw    %ax, i(%rip)
>>>        ret
>>>
>>>
>>
>> How about this patch?  I changed 32 TP load to
>>
>> (define_insn "*load_tp_x32_<mode>"
>>  [(set (match_operand:SWI48x 0 "register_operand" "=r")
>>        (unspec:SWI48x [(const_int 0)] UNSPEC_TP))]
>>  "TARGET_X32"
>>  "mov{l}\t{%%fs:0, %k0|%k0, DWORD PTR fs:0}"
>>  [(set_attr "type" "imov")
>>   (set_attr "modrm" "0")
>>   (set_attr "length" "7")
>>   (set_attr "memory" "load")
>>   (set_attr "imm_disp" "false")])
>>
>> and removed *load_tp_x32_zext.
>
> No, your whole approach with splitters is wrong.
>
> @@ -12747,11 +12747,11 @@
>  (define_mode_attr tp_seg [(SI "gs") (DI "fs")])
>
>  ;; Load and add the thread base pointer from %<tp_seg>:0.
> -(define_insn "*load_tp_x32"
> -  [(set (match_operand:SI 0 "register_operand" "=r")
> -       (unspec:SI [(const_int 0)] UNSPEC_TP))]
> +(define_insn "*load_tp_x32_<mode>"
> +  [(set (match_operand:SWI48x 0 "register_operand" "=r")
> +       (unspec:SWI48x [(const_int 0)] UNSPEC_TP))]
>   "TARGET_X32"
> -  "mov{l}\t{%%fs:0, %0|%0, DWORD PTR fs:0}"
> +  "mov{l}\t{%%fs:0, %k0|%k0, DWORD PTR fs:0}"
>
> The result is zero_extended SImode register, not fake SImode register in DImore.
>
> But as said, you should generate correct sequence from the beginning.
>

For x32,  thread pointer is an unsigned 32bit value.

movl %fs:0, %eax

is the correct instruction to load thread pointer into EAX and RAX.


-- 
H.J.



More information about the Gcc-patches mailing list