This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

[Bug c/69760] New: Wrong 64-bit memory address caused by an unneeded overflowing 32-bit integer multiplication on x86_64 under -O2 and -O3 code optimization

From: "dimitri_laikov at mail dot ru" <gcc-bugzilla at gcc dot gnu dot org>
To: gcc-bugs at gcc dot gnu dot org
Date: Wed, 10 Feb 2016 22:20:08 +0000
Subject: [Bug c/69760] New: Wrong 64-bit memory address caused by an unneeded overflowing 32-bit integer multiplication on x86_64 under -O2 and -O3 code optimization
Auto-submitted: auto-generated

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69760

            Bug ID: 69760
           Summary: Wrong 64-bit memory address caused by an unneeded
                    overflowing 32-bit integer multiplication on x86_64
                    under -O2 and -O3 code optimization
           Product: gcc
           Version: 5.3.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c
          Assignee: unassigned at gcc dot gnu.org
          Reporter: dimitri_laikov at mail dot ru
  Target Milestone: ---

This simplest code "test_func.c":

$ cat test_func.c

void test_func (double *a, int L, int m, int n, int N)
{
  int    i, k;
  for (i = 0; i < N; i++) {
    k = i - m;
    if ((k >= 0) && (k < n)) {
      a[L*k] = 0.0;
    }
  }
  return;
}

when compiled on x86_64 by GCC versions from at least 4.8.1 up to 5.3.0
is working right with -O0 or -O1 optimizations,
but fails with -O2 or -O3, as can be seen by calling it
from this main function "test_main.c":

$ cat test_main.c
#include <stdlib.h>

void test_func (double *a, int L, int m, int n, int N);

int main ()
{
  double *a;
  int    L, m, n, N;
  L = 10000000;
  n = 4;
  N = 100*n;
  a = (double *) malloc (sizeof (double)*L*n);
  for (m = 0; m < N; m += n) {
    test_func (a, L, m, n, N);
  }
  free (a);
  return 0;
}

$ for n in 0 1 2 3 ; do gcc-5.3.0 -S -O$n test_func.c -o test_func.O$n.s ; wc
test_func.O$n.s ; gcc -O0 test_func.O$n.s test_main.c -o test.x ; ./test.x ;
done
 51 119 881 test_func.O0.s
 35  79 551 test_func.O1.s
 45  94 696 test_func.O2.s
Segmentation fault
 45  94 696 test_func.O3.s
Segmentation fault


An even more _amazing_ thing is seen when printf is added:

$ cat test_func_print.c 
#include <stdio.h>

void test_func (double *a, int L, int m, int n, int N)
{
  int    i, k;
  for (i = 0; i < N; i++) {
    k = i - m;
    if ((k >= 0) && (k < n)) {
      printf ("k=%d a=%p a+L*k=%p\n", k, a, a + L*k);
      a[L*k] = 0.0;
    }
  }
  return;
}

$ gcc-5.3.0 -O2 test_func_print.c test_main.c -o test.x ; ./test.x
k=0 a=0x7f66ddc42010 a+L*k=0x7f66ddc42010
...
k=0 a=0x7f66ddc42010 a+L*k=0x7f6eddc42010
Segmentation fault

the corrupted pointer being 0x800000000 higher than it should!

(Should anyone say that this innocent C code is wrong?)

It can be understood by looking into the assembly file
(with comments added by us):

$ cat test_func.O2.S 
        .file   "test_func.c"
        .section        .text.unlikely,"ax",@progbits
.LCOLDB1:
        .text
.LHOTB1:
        .p2align 4,,15
        .globl  test_func
        .type   test_func, @function
test_func:
.LFB0:
        .cfi_startproc
        testl   %r8d, %r8d
        jle     .L1             # if (N <= 0) return
        movl    %edx, %eax      # %eax = m
        movslq  %esi, %r9       # %r9 = (long) L
        subl    %edx, %r8d      # %r8d = N - m
        negl    %eax            # %eax = k = -m
        salq    $3, %r9         # %r9 = 8*((long) L)
        imull   %eax, %esi      # %esi = L*(-m)
        movslq  %esi, %rsi      # %rsi = (long) (L*(-m))
        leaq    (%rdi,%rsi,8), %rsi     # %rsi = ak = a + L*(-m)
        .p2align 4,,10
        .p2align 3
.L4:
        testl   %eax, %eax
        js      .L3
        cmpl    %eax, %ecx
        jle     .L3
        movq    $0, (%rsi)      # if ((k >= 0) && (k < n)) *ak = 0.0
.L3:
        addl    $1, %eax        # %eax = k++
        addq    %r9, %rsi       # %rsi = ak += L
        cmpl    %r8d, %eax
        jne     .L4             # if (k != N - m) goto L4
.L1:
        rep ret
        .cfi_endproc
.LFE0:
        .size   test_func, .-test_func
        .section        .text.unlikely
.LCOLDE1:
        .text
.LHOTE1:
        .ident  "GCC: (GNU) 5.3.0"
        .section        .note.GNU-stack,"",@progbits

The problem is that the optimizer rewrites the code as if it were:

void test_func (double *a, int L, int m, int n, int N)
{
  double *ak;
  int    k;
  if (N <= 0) {
    return;
  }
  ak = a + L*(-m);
  for (k = -m; k < N - m; k++) {
    if ((k >= 0) && (k < n)) {
      *ak = 0.0;
    }
    ak += L;
  }
  return;
}

and because "L*(-m)" is computed in 32-bit precision,
in which it does not fit, the pointer "double *ak"
fails to reference "a[0]" after being m times incremented by L.

The problem would be solved by teaching the compiler
to use more bits when precomputing pointers for the optimized loops,
in this example:
...
ak = a + ((long) L)*(-m)
...

or something like this

test_func:
.LFB0:
        .cfi_startproc
        testl   %r8d, %r8d
        jle     .L1             # if (N <= 0) return
        movslq  %edx, %rax      #|%rax = (long) m
        movslq  %esi, %r9       # %r9 = (long) L
        subl    %edx, %r8d      # %r8d = N - m
        negq    %rax            #|%rax = k = -m
        salq    $3, %r9         # %r9 = 8*((long) L)
        movslq  %esi, %rsi      #|%rsi = (long) L
        imulq   %rax, %rsi      # %rsi = ((long) L)*(-m)
#       movslq  %esi, %rsi      #-
        leaq    (%rdi,%rsi,8), %rsi     # %rsi = ak = a + ((long) L)*(-m)
        .p2align 4,,10
        .p2align 3
.L4:
        testl   %eax, %eax
        js      .L3
        cmpl    %eax, %ecx
        jle     .L3
        movq    $0, (%rsi)      # if ((k >= 0) && (k < n)) *ak = 0.0
.L3:
        addl    $1, %eax        # %eax = k++
        addq    %r9, %rsi       # %rsi = ak += L
        cmpl    %r8d, %eax
        jne     .L4             # if (k != N - m) goto L4
.L1:
        rep ret

Many thanks for your kind attention.

Follow-Ups:
- [Bug c/69760] Wrong 64-bit memory address caused by an unneeded overflowing 32-bit integer multiplication on x86_64 under -O2 and -O3 code optimization
  - From: pinskia at gcc dot gnu.org
- [Bug c/69760] Wrong 64-bit memory address caused by an unneeded overflowing 32-bit integer multiplication on x86_64 under -O2 and -O3 code optimization
  - From: jakub at gcc dot gnu.org
- [Bug c/69760] [4.9/5/6 Regression] Wrong 64-bit memory address caused by an unneeded overflowing 32-bit integer multiplication on x86_64 under -O2 and -O3 code optimization
  - From: jakub at gcc dot gnu.org
- [Bug tree-optimization/69760] [4.9/5/6 Regression] Wrong 64-bit memory address caused by an unneeded overflowing 32-bit integer multiplication on x86_64 under -O2 and -O3 code optimization
  - From: rguenth at gcc dot gnu.org
- [Bug tree-optimization/69760] [4.9/5/6 Regression] Wrong 64-bit memory address caused by an unneeded overflowing 32-bit integer multiplication on x86_64 under -O2 and -O3 code optimization
  - From: rguenth at gcc dot gnu.org
- [Bug tree-optimization/69760] [4.9/5/6 Regression] Wrong 64-bit memory address caused by an unneeded overflowing 32-bit integer multiplication on x86_64 under -O2 and -O3 code optimization
  - From: rguenth at gcc dot gnu.org
- [Bug tree-optimization/69760] [4.9/5/6 Regression] Wrong 64-bit memory address caused by an unneeded overflowing 32-bit integer multiplication on x86_64 under -O2 and -O3 code optimization
  - From: jakub at gcc dot gnu.org
- [Bug tree-optimization/69760] [4.9/5/6 Regression] Wrong 64-bit memory address caused by an unneeded overflowing 32-bit integer multiplication on x86_64 under -O2 and -O3 code optimization
  - From: rguenth at gcc dot gnu.org
- [Bug tree-optimization/69760] [4.9/5/6 Regression] Wrong 64-bit memory address caused by an unneeded overflowing 32-bit integer multiplication on x86_64 under -O2 and -O3 code optimization
  - From: amker at gcc dot gnu.org
- [Bug tree-optimization/69760] [4.9/5/6 Regression] Wrong 64-bit memory address caused by an unneeded overflowing 32-bit integer multiplication on x86_64 under -O2 and -O3 code optimization
  - From: rguenth at gcc dot gnu.org
- [Bug tree-optimization/69760] [4.9/5/6 Regression] Wrong 64-bit memory address caused by an unneeded overflowing 32-bit integer multiplication on x86_64 under -O2 and -O3 code optimization
  - From: rguenth at gcc dot gnu.org
- [Bug tree-optimization/69760] [4.9/5 Regression] Wrong 64-bit memory address caused by an unneeded overflowing 32-bit integer multiplication on x86_64 under -O2 and -O3 code optimization
  - From: rguenth at gcc dot gnu.org
- [Bug tree-optimization/69760] [4.9/5 Regression] Wrong 64-bit memory address caused by an unneeded overflowing 32-bit integer multiplication on x86_64 under -O2 and -O3 code optimization
  - From: vogt at linux dot vnet.ibm.com

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]