aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2007-10-05 06:50:35 +0000
committerUlrich Drepper <drepper@redhat.com>2007-10-05 06:50:35 +0000
commit375d942968a95ec6c324d89922ee7a574c40630f (patch)
treefd9e064e4b293e36080fee72ebc1b44f7b9934f6
parent174420d2bc39c611fbc09669490b96fc6cc81520 (diff)
downloadglibc-375d942968a95ec6c324d89922ee7a574c40630f.zip
glibc-375d942968a95ec6c324d89922ee7a574c40630f.tar.gz
glibc-375d942968a95ec6c324d89922ee7a574c40630f.tar.bz2
2007-10-04 Jakub Jelinek
* stdlib/msort.c: Include stdint.h. (struct msort_param): New type. (msort_with_tmp): Use struct msort_param pointer for unchanging parameters. Add optimized handling for several common sizes and indirect sorting mode. (qsort): Adjust msort_with_tmp callers. For big S use indirect sorting. Suggested by Belazougui Djamel . * stdlib/Makefile (tests): Add tst-qsort2. * stdlib/tst-qsort2.c: New test.
-rw-r--r--ChangeLog14
-rw-r--r--stdlib/Makefile2
-rw-r--r--stdlib/msort.c276
-rw-r--r--stdlib/tst-qsort2.c89
4 files changed, 309 insertions, 72 deletions
diff --git a/ChangeLog b/ChangeLog
index eed866f..ee214fc 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,17 @@
+2007-10-04 Jakub Jelinek
+
+ * stdlib/msort.c: Include stdint.h.
+ (struct msort_param): New type.
+ (msort_with_tmp): Use struct msort_param pointer for unchanging
+ parameters. Add optimized handling for several common sizes
+ and indirect sorting mode.
+ (qsort): Adjust msort_with_tmp callers. For big S use indirect
+ sorting.
+ Suggested by Belazougui Djamel .
+
+ * stdlib/Makefile (tests): Add tst-qsort2.
+ * stdlib/tst-qsort2.c: New test.
+
2007-10-04 Ulrich Drepper <drepper@redhat.com>
* login/login_tty.c (login_tty): The Linux kernel can return EBUSY
diff --git a/stdlib/Makefile b/stdlib/Makefile
index 7390647..e72ab5b 100644
--- a/stdlib/Makefile
+++ b/stdlib/Makefile
@@ -68,7 +68,7 @@ tests := tst-strtol tst-strtod testmb testrand testsort testdiv \
tst-limits tst-rand48 bug-strtod tst-setcontext \
test-a64l tst-qsort tst-system testmb2 bug-strtod2 \
tst-atof1 tst-atof2 tst-strtod2 tst-strtod3 tst-rand48-2 \
- tst-makecontext tst-strtod4 tst-strtod5
+ tst-makecontext tst-strtod4 tst-strtod5 tst-qsort2
include ../Makeconfig
diff --git a/stdlib/msort.c b/stdlib/msort.c
index e69b401..3961e9e 100644
--- a/stdlib/msort.c
+++ b/stdlib/msort.c
@@ -1,6 +1,6 @@
/* An alternative to qsort, with an identical interface.
This file is part of the GNU C Library.
- Copyright (C) 1992,95-97,99,2000,01,02,04 Free Software Foundation, Inc.
+ Copyright (C) 1992,95-97,99,2000,01,02,04,07 Free Software Foundation, Inc.
Written by Mike Haertel, September 1988.
The GNU C Library is free software; you can redistribute it and/or
@@ -19,20 +19,25 @@
02111-1307 USA. */
#include <alloca.h>
+#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <memcopy.h>
#include <errno.h>
-static void msort_with_tmp (void *b, size_t n, size_t s,
- __compar_fn_t cmp, char *t);
+struct msort_param
+{
+ size_t s;
+ size_t var;
+ __compar_fn_t cmp;
+ char *t;
+};
+static void msort_with_tmp (const struct msort_param *p, void *b, size_t n);
static void
-msort_with_tmp (void *b, size_t n, size_t s, __compar_fn_t cmp,
- char *t)
+msort_with_tmp (const struct msort_param *p, void *b, size_t n)
{
- char *tmp;
char *b1, *b2;
size_t n1, n2;
@@ -42,65 +47,131 @@ msort_with_tmp (void *b, size_t n, size_t s, __compar_fn_t cmp,
n1 = n / 2;
n2 = n - n1;
b1 = b;
- b2 = (char *) b + (n1 * s);
+ b2 = (char *) b + (n1 * p->s);
- msort_with_tmp (b1, n1, s, cmp, t);
- msort_with_tmp (b2, n2, s, cmp, t);
+ msort_with_tmp (p, b1, n1);
+ msort_with_tmp (p, b2, n2);
- tmp = t;
+ char *tmp = p->t;
+ const size_t s = p->s;
+ __compar_fn_t cmp = p->cmp;
+ switch (p->var)
+ {
+ case 0:
+ while (n1 > 0 && n2 > 0)
+ {
+ if ((*cmp) (b1, b2) <= 0)
+ {
+ *(uint32_t *) tmp = *(uint32_t *) b1;
+ b1 += sizeof (uint32_t);
+ --n1;
+ }
+ else
+ {
+ *(uint32_t *) tmp = *(uint32_t *) b2;
+ b2 += sizeof (uint32_t);
+ --n2;
+ }
+ tmp += sizeof (uint32_t);
+ }
+ break;
+ case 1:
+ while (n1 > 0 && n2 > 0)
+ {
+ if ((*cmp) (b1, b2) <= 0)
+ {
+ *(uint64_t *) tmp = *(uint64_t *) b1;
+ b1 += sizeof (uint64_t);
+ --n1;
+ }
+ else
+ {
+ *(uint64_t *) tmp = *(uint64_t *) b2;
+ b2 += sizeof (uint64_t);
+ --n2;
+ }
+ tmp += sizeof (uint64_t);
+ }
+ break;
+ case 2:
+ while (n1 > 0 && n2 > 0)
+ {
+ unsigned long *tmpl = (unsigned long *) tmp;
+ unsigned long *bl;
+
+ tmp += s;
+ if ((*cmp) (b1, b2) <= 0)
+ {
+ bl = (unsigned long *) b1;
+ b1 += s;
+ --n1;
+ }
+ else
+ {
+ bl = (unsigned long *) b2;
+ b2 += s;
+ --n2;
+ }
+ while (tmpl < (unsigned long *) tmp)
+ *tmpl++ = *bl++;
+ }
+ break;
+ case 3:
+ while (n1 > 0 && n2 > 0)
+ {
+ if ((*cmp) (*(const void **) b1, *(const void **) b2) <= 0)
+ {
+ *(void **) tmp = *(void **) b1;
+ b1 += sizeof (void *);
+ --n1;
+ }
+ else
+ {
+ *(void **) tmp = *(void **) b2;
+ b2 += sizeof (void *);
+ --n2;
+ }
+ tmp += sizeof (void *);
+ }
+ break;
+ default:
+ while (n1 > 0 && n2 > 0)
+ {
+ if ((*cmp) (b1, b2) <= 0)
+ {
+ tmp = (char *) __mempcpy (tmp, b1, s);
+ b1 += s;
+ --n1;
+ }
+ else
+ {
+ tmp = (char *) __mempcpy (tmp, b2, s);
+ b2 += s;
+ --n2;
+ }
+ }
+ break;
+ }
- if (s == OPSIZ && (b1 - (char *) 0) % OPSIZ == 0)
- /* We are operating on aligned words. Use direct word stores. */
- while (n1 > 0 && n2 > 0)
- {
- if ((*cmp) (b1, b2) <= 0)
- {
- --n1;
- *((op_t *) tmp) = *((op_t *) b1);
- tmp += sizeof (op_t);
- b1 += sizeof (op_t);
- }
- else
- {
- --n2;
- *((op_t *) tmp) = *((op_t *) b2);
- tmp += sizeof (op_t);
- b2 += sizeof (op_t);
- }
- }
- else
- while (n1 > 0 && n2 > 0)
- {
- if ((*cmp) (b1, b2) <= 0)
- {
- tmp = (char *) __mempcpy (tmp, b1, s);
- b1 += s;
- --n1;
- }
- else
- {
- tmp = (char *) __mempcpy (tmp, b2, s);
- b2 += s;
- --n2;
- }
- }
if (n1 > 0)
memcpy (tmp, b1, n1 * s);
- memcpy (b, t, (n - n2) * s);
+ memcpy (b, p->t, (n - n2) * s);
}
void
qsort (void *b, size_t n, size_t s, __compar_fn_t cmp)
{
- const size_t size = n * s;
+ size_t size = n * s;
+ char *tmp = NULL;
+ struct msort_param p;
- if (size < 1024)
- {
- void *buf = __alloca (size);
+ /* For large object sizes use indirect sorting. */
+ if (s > 32)
+ size = 2 * n * sizeof (void *) + s;
- /* The temporary array is small, so put it on the stack. */
- msort_with_tmp (b, n, s, cmp, buf);
- }
+ if (size < 1024)
+ /* The temporary array is small, so put it on the stack. */
+ p.t = __alloca (size);
else
{
/* We should avoid allocating too much memory since this might
@@ -135,26 +206,89 @@ qsort (void *b, size_t n, size_t s, __compar_fn_t cmp)
/* If the memory requirements are too high don't allocate memory. */
if (size / pagesize > (size_t) phys_pages)
- _quicksort (b, n, s, cmp);
- else
{
- /* It's somewhat large, so malloc it. */
- int save = errno;
- char *tmp = malloc (size);
- if (tmp == NULL)
- {
- /* Couldn't get space, so use the slower algorithm
- that doesn't need a temporary array. */
- __set_errno (save);
- _quicksort (b, n, s, cmp);
- }
- else
- {
- __set_errno (save);
- msort_with_tmp (b, n, s, cmp, tmp);
- free (tmp);
- }
+ _quicksort (b, n, s, cmp);
+ return;
+ }
+
+ /* It's somewhat large, so malloc it. */
+ int save = errno;
+ tmp = malloc (size);
+ __set_errno (save);
+ if (tmp == NULL)
+ {
+ /* Couldn't get space, so use the slower algorithm
+ that doesn't need a temporary array. */
+ _quicksort (b, n, s, cmp);
+ return;
+ }
+ p.t = tmp;
+ }
+
+ p.s = s;
+ p.cmp = cmp;
+ p.var = 4;
+
+ if (s > 32)
+ {
+ /* Indirect sorting. */
+ char *ip = (char *) b;
+ void **tp = (void **) (p.t + n * sizeof (void *));
+ void **t = tp;
+ void *tmp_storage = (void *) (tp + n);
+
+ while ((void *) t < tmp_storage)
+ {
+ *t++ = ip;
+ ip += s;
+ }
+ p.s = sizeof (void *);
+ p.var = 3;
+ msort_with_tmp (&p, p.t + n * sizeof (void *), n);
+
+ /* tp[0] .. tp[n - 1] is now sorted, copy around entries of
+ the original array. Knuth vol. 3 (2nd ed.) exercise 5.2-10. */
+ char *kp;
+ size_t i;
+ for (i = 0, ip = (char *) b; i < n; i++, ip += s)
+ if ((kp = tp[i]) != ip)
+ {
+ size_t j = i;
+ char *jp = ip;
+ memcpy (tmp_storage, ip, s);
+
+ do
+ {
+ size_t k = (kp - (char *) b) / s;
+ tp[j] = jp;
+ memcpy (jp, kp, s);
+ j = k;
+ jp = kp;
+ kp = tp[k];
+ }
+ while (kp != ip);
+
+ tp[j] = jp;
+ memcpy (jp, tmp_storage, s);
+ }
+ }
+ else
+ {
+ if ((s & (sizeof (uint32_t) - 1)) == 0
+ && ((char *) b - (char *) 0) % __alignof__ (uint32_t) == 0)
+ {
+ if (s == sizeof (uint32_t))
+ p.var = 0;
+ else if (s == sizeof (uint64_t)
+ && ((char *) b - (char *) 0) % __alignof__ (uint64_t) == 0)
+ p.var = 1;
+ else if ((s & (sizeof (unsigned long) - 1)) == 0
+ && ((char *) b - (char *) 0)
+ % __alignof__ (unsigned long) == 0)
+ p.var = 2;
}
+ msort_with_tmp (&p, b, n);
}
+ free (tmp);
}
libc_hidden_def (qsort)
diff --git a/stdlib/tst-qsort2.c b/stdlib/tst-qsort2.c
new file mode 100644
index 0000000..75d4a17
--- /dev/null
+++ b/stdlib/tst-qsort2.c
@@ -0,0 +1,89 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+char *array;
+char *array_end;
+size_t member_size;
+
+int
+compare (const void *a1, const void *b1)
+{
+ const char *a = a1;
+ const char *b = b1;
+
+ if (! (array <= a && a < array_end
+ && array <= b && b < array_end))
+ {
+ puts ("compare arguments not inside of the array");
+ exit (EXIT_FAILURE);
+ }
+ int ret = b[0] - a[0];
+ if (ret)
+ return ret;
+ if (member_size > 1)
+ return b[1] - a[1];
+ return 0;
+}
+
+int
+test (size_t nmemb, size_t size)
+{
+ array = malloc (nmemb * size);
+ if (array == NULL)
+ {
+ printf ("%zd x %zd: no memory", nmemb, size);
+ return 1;
+ }
+
+ array_end = array + nmemb * size;
+ member_size = size;
+
+ char *p;
+ size_t i;
+ size_t bias = random ();
+ for (i = 0, p = array; i < nmemb; i++, p += size)
+ {
+ p[0] = (char) (i + bias);
+ if (size > 1)
+ p[1] = (char) ((i + bias) >> 8);
+ }
+
+ qsort (array, nmemb, size, compare);
+
+ for (i = 0, p = array; i < nmemb - 1; i++, p += size)
+ {
+ if (p[0] < p[size]
+ || (size > 1 && p[0] == p[size] && p[1] < p[size + 1]))
+ {
+ printf ("%zd x %zd: failure at offset %zd\n", nmemb,
+ size, i);
+ free (array);
+ return 1;
+ }
+ }
+
+ free (array);
+ return 0;
+}
+
+int
+main (int argc, char **argv)
+{
+ int ret = 0;
+ if (argc >= 2)
+ ret |= test (atoi (argv[1]), atoi (argv[2]));
+ else
+ {
+ ret |= test (10000, 1);
+ ret |= test (200000, 2);
+ ret |= test (2000000, 3);
+ ret |= test (2132310, 4);
+ ret |= test (1202730, 7);
+ ret |= test (1184710, 8);
+ ret |= test (272710, 12);
+ ret |= test (14170, 32);
+ ret |= test (4170, 320);
+ }
+
+ return ret;
+}