Paper Mario DX
Paper Mario (N64) modding
 
Loading...
Searching...
No Matches
qsort.h
Go to the documentation of this file.
1/*
2 * Copyright (c) 2013, 2017 Alexey Tourbin
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23/*
24 * This is a traditional Quicksort implementation which mostly follows
25 * [Sedgewick 1978]. Sorting is performed entirely on array indices,
26 * while actual access to the array elements is abstracted out with the
27 * user-defined `LESS` and `SWAP` primitives.
28 *
29 * Synopsis:
30 * QSORT(N, LESS, SWAP);
31 * where
32 * N - the number of elements in A[];
33 * LESS(i, j) - compares A[i] to A[j];
34 * SWAP(i, j) - exchanges A[i] with A[j].
35 */
36
37#ifndef QSORT_H
38#define QSORT_H
39
40/* Sort 3 elements. */
41#define Q_SORT3(q_a1, q_a2, q_a3, Q_LESS, Q_SWAP) \
42do { \
43 if (Q_LESS(q_a2, q_a1)) { \
44 if (Q_LESS(q_a3, q_a2)) \
45 Q_SWAP(q_a1, q_a3); \
46 else { \
47 Q_SWAP(q_a1, q_a2); \
48 if (Q_LESS(q_a3, q_a2)) \
49 Q_SWAP(q_a2, q_a3); \
50 } \
51 } \
52 else if (Q_LESS(q_a3, q_a2)) { \
53 Q_SWAP(q_a2, q_a3); \
54 if (Q_LESS(q_a2, q_a1)) \
55 Q_SWAP(q_a1, q_a2); \
56 } \
57} while (0)
58
59/* Partition [q_l,q_r] around a pivot. After partitioning,
60 * [q_l,q_j] are the elements that are less than or equal to the pivot,
61 * while [q_i,q_r] are the elements greater than or equal to the pivot. */
62#define Q_PARTITION(q_l, q_r, q_i, q_j, Q_UINT, Q_LESS, Q_SWAP) \
63do { \
64 /* The middle element, not to be confused with the median. */ \
65 Q_UINT q_m = q_l + ((q_r - q_l) >> 1); \
66 /* Reorder the second, the middle, and the last items. \
67 * As [Edelkamp Weiss 2016] explain, using the second element \
68 * instead of the first one helps avoid bad behaviour for \
69 * decreasingly sorted arrays. This method is used in recent \
70 * versions of gcc's std::sort, see gcc bug 58437#c13, although \
71 * the details are somewhat different (cf. #c14). */ \
72 Q_SORT3(q_l + 1, q_m, q_r, Q_LESS, Q_SWAP); \
73 /* Place the median at the beginning. */ \
74 Q_SWAP(q_l, q_m); \
75 /* Partition [q_l+2, q_r-1] around the median which is in q_l. \
76 * q_i and q_j are initially off by one, they get decremented \
77 * in the do-while loops. */ \
78 q_i = q_l + 1; q_j = q_r; \
79 while (1) { \
80 do q_i++; while (Q_LESS(q_i, q_l)); \
81 do q_j--; while (Q_LESS(q_l, q_j)); \
82 if (q_i >= q_j) break; /* Sedgewick says "until j < i" */ \
83 Q_SWAP(q_i, q_j); \
84 } \
85 /* Compensate for the i==j case. */ \
86 q_i = q_j + 1; \
87 /* Put the median to its final place. */ \
88 Q_SWAP(q_l, q_j); \
89 /* The median is not part of the left subfile. */ \
90 q_j--; \
91} while (0)
92
93/* Insertion sort is applied to small subfiles - this is contrary to
94 * Sedgewick's suggestion to run a separate insertion sort pass after
95 * the partitioning is done. The reason I don't like a separate pass
96 * is that it triggers extra comparisons, because it can't see that the
97 * medians are already in their final positions and need not be rechecked.
98 * Since I do not assume that comparisons are cheap, I also do not try
99 * to eliminate the (q_j > q_l) boundary check. */
100#define Q_INSERTION_SORT(q_l, q_r, Q_UINT, Q_LESS, Q_SWAP) \
101do { \
102 Q_UINT q_i, q_j; \
103 /* For each item starting with the second... */ \
104 for (q_i = q_l + 1; q_i <= q_r; q_i++) \
105 /* move it down the array so that the first part is sorted. */ \
106 for (q_j = q_i; q_j > q_l && (Q_LESS(q_j, q_j - 1)); q_j--) \
107 Q_SWAP(q_j, q_j - 1); \
108} while (0)
109
110/* When the size of [q_l,q_r], i.e. q_r-q_l+1, is greater than or equal to
111 * Q_THRESH, the algorithm performs recursive partitioning. When the size
112 * drops below Q_THRESH, the algorithm switches to insertion sort.
113 * The minimum valid value is probably 5 (with 5 items, the second and
114 * the middle items, the middle itself being rounded down, are distinct). */
115#define Q_THRESH 16
116
117/* The main loop. */
118#define Q_LOOP(Q_UINT, Q_N, Q_LESS, Q_SWAP) \
119do { \
120 Q_UINT q_l = 0; \
121 Q_UINT q_r = (Q_N) - 1; \
122 Q_UINT q_sp = 0; /* the number of frames pushed to the stack */ \
123 struct { Q_UINT q_l, q_r; } \
124 /* On 32-bit platforms, to sort a "char[3GB+]" array, \
125 * it may take full 32 stack frames. On 64-bit CPUs, \
126 * though, the address space is limited to 48 bits. \
127 * The usage is further reduced if Q_N has a 32-bit type. */ \
128 q_st[sizeof(Q_UINT) > 4 && sizeof(Q_N) > 4 ? 48 : 32]; \
129 while (1) { \
130 if (q_r - q_l + 1 >= Q_THRESH) { \
131 Q_UINT q_i, q_j; \
132 Q_PARTITION(q_l, q_r, q_i, q_j, Q_UINT, Q_LESS, Q_SWAP); \
133 /* Now have two subfiles: [q_l,q_j] and [q_i,q_r]. \
134 * Dealing with them depends on which one is bigger. */ \
135 if (q_j - q_l >= q_r - q_i) \
136 Q_SUBFILES(q_l, q_j, q_i, q_r); \
137 else \
138 Q_SUBFILES(q_i, q_r, q_l, q_j); \
139 } \
140 else { \
141 Q_INSERTION_SORT(q_l, q_r, Q_UINT, Q_LESS, Q_SWAP); \
142 /* Pop subfiles from the stack, until it gets empty. */ \
143 if (q_sp == 0) break; \
144 q_sp--; \
145 q_l = q_st[q_sp].q_l; \
146 q_r = q_st[q_sp].q_r; \
147 } \
148 } \
149} while (0)
150
151/* The missing part: dealing with subfiles.
152 * Assumes that the first subfile is not smaller than the second. */
153#define Q_SUBFILES(q_l1, q_r1, q_l2, q_r2) \
154do { \
155 /* If the second subfile is only a single element, it needs \
156 * no further processing. The first subfile will be processed \
157 * on the next iteration (both subfiles cannot be only a single \
158 * element, due to Q_THRESH). */ \
159 if (q_l2 == q_r2) { \
160 q_l = q_l1; \
161 q_r = q_r1; \
162 } \
163 else { \
164 /* Otherwise, both subfiles need processing. \
165 * Push the larger subfile onto the stack. */ \
166 q_st[q_sp].q_l = q_l1; \
167 q_st[q_sp].q_r = q_r1; \
168 q_sp++; \
169 /* Process the smaller subfile on the next iteration. */ \
170 q_l = q_l2; \
171 q_r = q_r2; \
172 } \
173} while (0)
174
175/* And now, ladies and gentlemen, may I proudly present to you... */
176#define QSORT(Q_N, Q_LESS, Q_SWAP) \
177do { \
178 if ((Q_N) > 1) \
179 /* We could check sizeof(Q_N) and use "unsigned", but at least \
180 * on x86_64, this has the performance penalty of up to 5%. */ \
181 Q_LOOP(unsigned long, Q_N, Q_LESS, Q_SWAP); \
182} while (0)
183
184#endif
185
186/* ex:set ts=8 sts=4 sw=4 noet: */