This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH 1/2] gcc_qsort: source code changes
On May 10, 2018 5:56:40 PM GMT+02:00, Alexander Monakov <amonakov@ispras.ru> wrote:
> * sort.cc: New file.
> * system.h [!CHECKING_P] (qsort): Redirect to gcc_qsort.
> * vec.c (qsort_chk): Use gcc_qsort.
Just a quick first remark - how about putting this into libiberty? And then name it xqsort?
Richard.
>---
>gcc/sort.cc | 232
>+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> gcc/system.h | 7 +-
> gcc/vec.c | 2 +-
> 3 files changed, 238 insertions(+), 3 deletions(-)
> create mode 100644 gcc/sort.cc
>
>diff --git a/gcc/sort.cc b/gcc/sort.cc
>new file mode 100644
>index 00000000000..4faf6d45dc6
>--- /dev/null
>+++ b/gcc/sort.cc
>@@ -0,0 +1,232 @@
>+/* Platform-independent deterministic sort function.
>+ Copyright (C) 2018 Free Software Foundation, Inc.
>+ Contributed by Alexander Monakov.
>+
>+This file is part of GCC.
>+
>+GCC is free software; you can redistribute it and/or modify it
>+under the terms of the GNU General Public License as published by the
>+Free Software Foundation; either version 3, or (at your option) any
>+later version.
>+
>+GCC is distributed in the hope that it will be useful, but WITHOUT
>+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
>+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
>+for more details.
>+
>+You should have received a copy of the GNU General Public License
>+along with GCC; see the file COPYING3. If not see
>+<http://www.gnu.org/licenses/>. */
>+
>+/* This implements a sort function suitable for GCC use cases:
>+ - signature-compatible to C qsort, but relaxed contract:
>+ - may apply the comparator to elements in a temporary buffer
>+ - may abort on allocation failure
>+ - deterministic (but not necessarily stable)
>+ - fast, especially for common cases (0-5 elements of size 8 or 4)
>+
>+ The implementation uses a network sort for up to 5 elements and
>+ a merge sort on top of that. Neither stage has branches depending
>on
>+ comparator result, trading extra arithmetic for branch
>mispredictions. */
>+
>+#ifdef GENERATOR_FILE
>+#include "bconfig.h"
>+#else
>+#include "config.h"
>+#endif
>+
>+#include "system.h"
>+
>+#define likely(cond) __builtin_expect ((cond), 1)
>+
>+#ifdef __GNUC__
>+#define noinline __attribute__ ((__noinline__))
>+#else
>+#define noinline
>+#endif
>+
>+/* C-style qsort comparator function type. */
>+typedef int cmp_fn (const void *, const void *);
>+
>+/* Structure holding read-mostly (read-only in netsort) context. */
>+struct sort_ctx
>+{
>+ cmp_fn *cmp; // pointer to comparator
>+ char *out; // output buffer
>+ size_t n; // number of elements
>+ size_t size; // element size
>+};
>+
>+/* Helper for netsort. Permute, possibly in-place, 2 or 3 elements,
>+ placing E0 to C->OUT, E1 to C->OUT + C->SIZE, and so on. */
>+static void
>+reorder23 (sort_ctx *c, char *e0, char *e1, char *e2)
>+{
>+#define REORDER_23(SIZE, STRIDE, OFFSET) \
>+do { \
>+ size_t t0, t1; \
>+ memcpy (&t0, e0 + OFFSET, SIZE); \
>+ memcpy (&t1, e1 + OFFSET, SIZE); \
>+ char *out = c->out + OFFSET; \
>+ if (likely (c->n == 3)) \
>+ memcpy (out + 2*STRIDE, e2 + OFFSET, SIZE); \
>+ memcpy (out, &t0, SIZE); out += STRIDE; \
>+ memcpy (out, &t1, SIZE); \
>+} while (0)
>+
>+ if (sizeof (size_t) == 8 && likely (c->size == 8))
>+ REORDER_23 (8, 8, 0);
>+ else if (likely (c->size == 4))
>+ REORDER_23 (4, 4, 0);
>+ else
>+ {
>+ size_t offset = 0, step = sizeof (size_t);
>+ for (; offset + step <= c->size; offset += step)
>+ REORDER_23 (step, c->size, offset);
>+ for (; offset < c->size; offset++)
>+ REORDER_23 (1, c->size, offset);
>+ }
>+}
>+
>+/* Like reorder23, but permute 4 or 5 elements. */
>+static void
>+reorder45 (sort_ctx *c, char *e0, char *e1, char *e2, char *e3, char
>*e4)
>+{
>+#define REORDER_45(SIZE, STRIDE, OFFSET) \
>+do { \
>+ size_t t0, t1, t2, t3; \
>+ memcpy (&t0, e0 + OFFSET, SIZE); \
>+ memcpy (&t1, e1 + OFFSET, SIZE); \
>+ memcpy (&t2, e2 + OFFSET, SIZE); \
>+ memcpy (&t3, e3 + OFFSET, SIZE); \
>+ char *out = c->out + OFFSET; \
>+ if (likely (c->n == 5)) \
>+ memcpy (out + 4*STRIDE, e4 + OFFSET, SIZE); \
>+ memcpy (out, &t0, SIZE); out += STRIDE; \
>+ memcpy (out, &t1, SIZE); out += STRIDE; \
>+ memcpy (out, &t2, SIZE); out += STRIDE; \
>+ memcpy (out, &t3, SIZE); \
>+} while (0)
>+
>+ if (sizeof (size_t) == 8 && likely (c->size == 8))
>+ REORDER_45 (8, 8, 0);
>+ else if (likely(c->size == 4))
>+ REORDER_45 (4, 4, 0);
>+ else
>+ {
>+ size_t offset = 0, step = sizeof (size_t);
>+ for (; offset + step <= c->size; offset += step)
>+ REORDER_45 (step, c->size, offset);
>+ for (; offset < c->size; offset++)
>+ REORDER_45 (1, c->size, offset);
>+ }
>+}
>+
>+/* Helper for netsort. Invoke comparator CMP on E0 and E1.
>+ Return E0^E1 if E0 compares less than E1, zero otherwise.
>+ This is noinline to avoid code growth and confine invocation
>+ to a single call site, assisting indirect branch prediction. */
>+noinline static intptr_t
>+cmp1 (char *e0, char *e1, cmp_fn *cmp)
>+{
>+ intptr_t x = (intptr_t)e0 ^ (intptr_t)e1;
>+ return x & (cmp (e0, e1) >> 31);
>+}
>+
>+/* Execute network sort on 2 to 5 elements from IN, placing them into
>C->OUT.
>+ IN may be equal to C->OUT, in which case elements are sorted in
>place. */
>+static void
>+netsort (char *in, sort_ctx *c)
>+{
>+#define CMP(e0, e1) \
>+do { \
>+ intptr_t x = cmp1 (e1, e0, c->cmp); \
>+ e0 = (char *)((intptr_t)e0 ^ x); \
>+ e1 = (char *)((intptr_t)e1 ^ x); \
>+} while (0)
>+
>+ char *e0 = in, *e1 = e0 + c->size, *e2 = e1 + c->size;
>+ CMP (e0, e1);
>+ if (likely (c->n == 3))
>+ {
>+ CMP (e1, e2);
>+ CMP (e0, e1);
>+ }
>+ if (c->n <= 3)
>+ return reorder23 (c, e0, e1, e2);
>+ char *e3 = e2 + c->size, *e4 = e3 + c->size;
>+ if (likely (c->n == 5))
>+ {
>+ CMP (e3, e4);
>+ CMP (e2, e4);
>+ }
>+ CMP (e2, e3);
>+ if (likely (c->n == 5))
>+ {
>+ CMP (e0, e3);
>+ CMP (e1, e4);
>+ }
>+ CMP (e0, e2);
>+ CMP (e1, e3);
>+ CMP (e1, e2);
>+ reorder45 (c, e0, e1, e2, e3, e4);
>+}
>+
>+/* Execute merge sort on N elements from IN, placing them into OUT,
>+ using TMP as temporary storage if IN is equal to OUT.
>+ This is a stable sort if netsort is used only for 2 or 3 elements.
>*/
>+static void
>+mergesort (char *in, sort_ctx *c, size_t n, char *out, char *tmp)
>+{
>+ if (likely (n <= 5))
>+ {
>+ c->out = out;
>+ c->n = n;
>+ return netsort (in, c);
>+ }
>+ size_t nl = n / 2, nr = n - nl, sz = nl * c->size;
>+ char *mid = in + sz, *r = out + sz, *l = in == out ? tmp : in;
>+ /* Sort the right half, outputting to right half of OUT. */
>+ mergesort (mid, c, nr, r, tmp);
>+ /* Sort the left half, leaving left half of OUT free. */
>+ mergesort (in, c, nl, l, mid);
>+ /* Merge sorted halves given by L, R to [OUT, END). */
>+#define MERGE_ELTSIZE(SIZE) \
>+do { \
>+ intptr_t mr = c->cmp (r, l) >> 31; \
>+ intptr_t lr = (intptr_t)l ^ (intptr_t)r; \
>+ lr = (intptr_t)l ^ (lr & mr); \
>+ out = (char *)memcpy (out, (char *)lr, SIZE); \
>+ out += SIZE; \
>+ r += mr & SIZE; \
>+ if (r == out) return; \
>+ l += ~mr & SIZE; \
>+} while (r != end)
>+
>+ if (likely (c->cmp(r, l + (r - out) - c->size) < 0))
>+ {
>+ char *end = out + n * c->size;
>+ if (sizeof (size_t) == 8 && likely (c->size == 8))
>+ MERGE_ELTSIZE (8);
>+ else if (likely (c->size == 4))
>+ MERGE_ELTSIZE (4);
>+ else
>+ MERGE_ELTSIZE (c->size);
>+ }
>+ memcpy (out, l, r - out);
>+}
>+
>+void
>+gcc_qsort (void *vbase, size_t n, size_t size, cmp_fn *cmp)
>+{
>+ if (n < 2)
>+ return;
>+ char *base = (char *)vbase;
>+ sort_ctx c = {cmp, base, n, size};
>+ long long scratch[32];
>+ size_t bufsz = (n / 2) * size;
>+ void *buf = bufsz <= sizeof scratch ? scratch : xmalloc (bufsz);
>+ mergesort (base, &c, n, base, (char *)buf);
>+ if (buf != scratch)
>+ free (buf);
>+}
>diff --git a/gcc/system.h b/gcc/system.h
>index 4abc321c71d..88dffccb8ab 100644
>--- a/gcc/system.h
>+++ b/gcc/system.h
>@@ -1202,11 +1202,14 @@ helper_const_non_const_cast (const char *p)
>/* qsort comparator consistency checking: except in release-checking
>compilers,
>redirect 4-argument qsort calls to qsort_chk; keep 1-argument
>invocations
>corresponding to vec::qsort (cmp): they use C qsort internally anyway.
>*/
>-#if CHECKING_P
>+void qsort_chk (void *, size_t, size_t, int (*)(const void *, const
>void *));
>+void gcc_qsort (void *, size_t, size_t, int (*)(const void *, const
>void *));
> #define PP_5th(a1, a2, a3, a4, a5, ...) a5
> #undef qsort
>+#if CHECKING_P
>#define qsort(...) PP_5th (__VA_ARGS__, qsort_chk, 3, 2, qsort, 0)
>(__VA_ARGS__)
>-void qsort_chk (void *, size_t, size_t, int (*)(const void *, const
>void *));
>+#else
>+#define qsort(...) PP_5th (__VA_ARGS__, gcc_qsort, 3, 2, qsort, 0)
>(__VA_ARGS__)
> #endif
>
> #endif /* ! GCC_SYSTEM_H */
>diff --git a/gcc/vec.c b/gcc/vec.c
>index 11924a80a2d..2941715a34a 100644
>--- a/gcc/vec.c
>+++ b/gcc/vec.c
>@@ -215,7 +215,7 @@ void
> qsort_chk (void *base, size_t n, size_t size,
> int (*cmp)(const void *, const void *))
> {
>- (qsort) (base, n, size, cmp);
>+ gcc_qsort (base, n, size, cmp);
> #if 0
> #define LIM(n) (n)
> #else