File: | out/../deps/v8/src/bigint/mul-toom.cc |
Warning: | line 184, column 3 Value stored to 'R1_sign' is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | // Copyright 2021 the V8 project authors. All rights reserved. |
2 | // Use of this source code is governed by a BSD-style license that can be |
3 | // found in the LICENSE file. |
4 | |
5 | // Toom-Cook multiplication. |
6 | // Reference: https://en.wikipedia.org/wiki/Toom%E2%80%93Cook_multiplication |
7 | |
8 | #include <algorithm> |
9 | |
10 | #include "src/bigint/bigint-internal.h" |
11 | #include "src/bigint/digit-arithmetic.h" |
12 | #include "src/bigint/vector-arithmetic.h" |
13 | |
14 | namespace v8 { |
15 | namespace bigint { |
16 | |
17 | namespace { |
18 | |
19 | void TimesTwo(RWDigits X) { |
20 | digit_t carry = 0; |
21 | for (int i = 0; i < X.len(); i++) { |
22 | digit_t d = X[i]; |
23 | X[i] = (d << 1) | carry; |
24 | carry = d >> (kDigitBits - 1); |
25 | } |
26 | } |
27 | |
28 | void DivideByTwo(RWDigits X) { |
29 | digit_t carry = 0; |
30 | for (int i = X.len() - 1; i >= 0; i--) { |
31 | digit_t d = X[i]; |
32 | X[i] = (d >> 1) | carry; |
33 | carry = d << (kDigitBits - 1); |
34 | } |
35 | } |
36 | |
37 | void DivideByThree(RWDigits X) { |
38 | digit_t remainder = 0; |
39 | for (int i = X.len() - 1; i >= 0; i--) { |
40 | digit_t d = X[i]; |
41 | digit_t upper = (remainder << kHalfDigitBits) | (d >> kHalfDigitBits); |
42 | digit_t u_result = upper / 3; |
43 | remainder = upper - 3 * u_result; |
44 | digit_t lower = (remainder << kHalfDigitBits) | (d & kHalfDigitMask); |
45 | digit_t l_result = lower / 3; |
46 | remainder = lower - 3 * l_result; |
47 | X[i] = (u_result << kHalfDigitBits) | l_result; |
48 | } |
49 | } |
50 | |
51 | } // namespace |
52 | |
53 | #if DEBUG |
54 | // Set {len_} to 1 rather than 0 so that attempts to access the first digit |
55 | // will crash. |
56 | #define MARK_INVALID(D)(void(0)) D = RWDigits(nullptr, 1) |
57 | #else |
58 | #define MARK_INVALID(D)(void(0)) (void(0)) |
59 | #endif |
60 | |
61 | void ProcessorImpl::Toom3Main(RWDigits Z, Digits X, Digits Y) { |
62 | DCHECK(Z.len() >= X.len() + Y.len())(void(0)); |
63 | // Phase 1: Splitting. |
64 | int i = DIV_CEIL(std::max(X.len(), Y.len()), 3)(((std::max(X.len(), Y.len()))-1) / (3) + 1); |
65 | Digits X0(X, 0, i); |
66 | Digits X1(X, i, i); |
67 | Digits X2(X, 2 * i, i); |
68 | Digits Y0(Y, 0, i); |
69 | Digits Y1(Y, i, i); |
70 | Digits Y2(Y, 2 * i, i); |
71 | |
72 | // Temporary storage. |
73 | int p_len = i + 1; // For all px, qx below. |
74 | int r_len = 2 * p_len; // For all r_x, Rx below. |
75 | Storage temp_storage(4 * r_len); |
76 | // We will use the same variable names as the Wikipedia article, as much as |
77 | // C++ lets us: our "p_m1" is their "p(-1)" etc. For consistency with other |
78 | // algorithms, we use X and Y where Wikipedia uses m and n. |
79 | // We will use and re-use the temporary storage as follows: |
80 | // |
81 | // chunk | -------- time -----------> |
82 | // [0 .. i] |( po )( p_m1 ) ( r_m2 ) |
83 | // [i+1 .. rlen-1] |( qo )( q_m1 ) ( r_m2 ) |
84 | // [rlen .. rlen+i] | (p_1 ) ( p_m2 ) (r_inf) |
85 | // [rlen+i+1 .. 2*rlen-1] | (q_1 ) ( q_m2 ) (r_inf) |
86 | // [2*rlen .. 3*rlen-1] | ( r_1 ) |
87 | // [3*rlen .. 4*rlen-1] | ( r_m1 ) |
88 | // |
89 | // This requires interleaving phases 2 and 3 a bit: after computing |
90 | // r_1 = p_1 * q_1, we can re-use p_1's storage for p_m2, and so on. |
91 | digit_t* t = temp_storage.get(); |
92 | RWDigits po(t, p_len); |
93 | RWDigits qo(t + p_len, p_len); |
94 | RWDigits p_1(t + r_len, p_len); |
95 | RWDigits q_1(t + r_len + p_len, p_len); |
96 | RWDigits r_1(t + 2 * r_len, r_len); |
97 | RWDigits r_m1(t + 3 * r_len, r_len); |
98 | |
99 | // We can also share the backing stores of Z, r_0, R0. |
100 | DCHECK(Z.len() >= r_len)(void(0)); |
101 | RWDigits r_0(Z, 0, r_len); |
102 | |
103 | // Phase 2a: Evaluation, steps 0, 1, m1. |
104 | // po = X0 + X2 |
105 | Add(po, X0, X2); |
106 | // p_0 = X0 |
107 | // p_1 = po + X1 |
108 | Add(p_1, po, X1); |
109 | // p_m1 = po - X1 |
110 | RWDigits p_m1 = po; |
111 | bool p_m1_sign = SubtractSigned(p_m1, po, false, X1, false); |
112 | MARK_INVALID(po)(void(0)); |
113 | |
114 | // qo = Y0 + Y2 |
115 | Add(qo, Y0, Y2); |
116 | // q_0 = Y0 |
117 | // q_1 = qo + Y1 |
118 | Add(q_1, qo, Y1); |
119 | // q_m1 = qo - Y1 |
120 | RWDigits q_m1 = qo; |
121 | bool q_m1_sign = SubtractSigned(q_m1, qo, false, Y1, false); |
122 | MARK_INVALID(qo)(void(0)); |
123 | |
124 | // Phase 3a: Pointwise multiplication, steps 0, 1, m1. |
125 | Multiply(r_0, X0, Y0); |
126 | Multiply(r_1, p_1, q_1); |
127 | Multiply(r_m1, p_m1, q_m1); |
128 | bool r_m1_sign = p_m1_sign != q_m1_sign; |
129 | |
130 | // Phase 2b: Evaluation, steps m2 and inf. |
131 | // p_m2 = (p_m1 + X2) * 2 - X0 |
132 | RWDigits p_m2 = p_1; |
133 | MARK_INVALID(p_1)(void(0)); |
134 | bool p_m2_sign = AddSigned(p_m2, p_m1, p_m1_sign, X2, false); |
135 | TimesTwo(p_m2); |
136 | p_m2_sign = SubtractSigned(p_m2, p_m2, p_m2_sign, X0, false); |
137 | // p_inf = X2 |
138 | |
139 | // q_m2 = (q_m1 + Y2) * 2 - Y0 |
140 | RWDigits q_m2 = q_1; |
141 | MARK_INVALID(q_1)(void(0)); |
142 | bool q_m2_sign = AddSigned(q_m2, q_m1, q_m1_sign, Y2, false); |
143 | TimesTwo(q_m2); |
144 | q_m2_sign = SubtractSigned(q_m2, q_m2, q_m2_sign, Y0, false); |
145 | // q_inf = Y2 |
146 | |
147 | // Phase 3b: Pointwise multiplication, steps m2 and inf. |
148 | RWDigits r_m2(t, r_len); |
149 | MARK_INVALID(p_m1)(void(0)); |
150 | MARK_INVALID(q_m1)(void(0)); |
151 | Multiply(r_m2, p_m2, q_m2); |
152 | bool r_m2_sign = p_m2_sign != q_m2_sign; |
153 | |
154 | RWDigits r_inf(t + r_len, r_len); |
155 | MARK_INVALID(p_m2)(void(0)); |
156 | MARK_INVALID(q_m2)(void(0)); |
157 | Multiply(r_inf, X2, Y2); |
158 | |
159 | // Phase 4: Interpolation. |
160 | Digits R0 = r_0; |
161 | Digits R4 = r_inf; |
162 | // R3 <- (r_m2 - r_1) / 3 |
163 | RWDigits R3 = r_m2; |
164 | bool R3_sign = SubtractSigned(R3, r_m2, r_m2_sign, r_1, false); |
165 | DivideByThree(R3); |
166 | // R1 <- (r_1 - r_m1) / 2 |
167 | RWDigits R1 = r_1; |
168 | bool R1_sign = SubtractSigned(R1, r_1, false, r_m1, r_m1_sign); |
169 | DivideByTwo(R1); |
170 | // R2 <- r_m1 - r_0 |
171 | RWDigits R2 = r_m1; |
172 | bool R2_sign = SubtractSigned(R2, r_m1, r_m1_sign, R0, false); |
173 | // R3 <- (R2 - R3) / 2 + 2 * r_inf |
174 | R3_sign = SubtractSigned(R3, R2, R2_sign, R3, R3_sign); |
175 | DivideByTwo(R3); |
176 | // TODO(jkummerow): Would it be a measurable improvement to write an |
177 | // "AddTwice" helper? |
178 | R3_sign = AddSigned(R3, R3, R3_sign, r_inf, false); |
179 | R3_sign = AddSigned(R3, R3, R3_sign, r_inf, false); |
180 | // R2 <- R2 + R1 - R4 |
181 | R2_sign = AddSigned(R2, R2, R2_sign, R1, R1_sign); |
182 | R2_sign = SubtractSigned(R2, R2, R2_sign, R4, false); |
183 | // R1 <- R1 - R3 |
184 | R1_sign = SubtractSigned(R1, R1, R1_sign, R3, R3_sign); |
Value stored to 'R1_sign' is never read | |
185 | |
186 | #if DEBUG |
187 | R1.Normalize(); |
188 | R2.Normalize(); |
189 | R3.Normalize(); |
190 | DCHECK(R1_sign == false || R1.len() == 0)(void(0)); |
191 | DCHECK(R2_sign == false || R2.len() == 0)(void(0)); |
192 | DCHECK(R3_sign == false || R3.len() == 0)(void(0)); |
193 | #endif |
194 | |
195 | // Phase 5: Recomposition. R0 is already in place. Overflow can't happen. |
196 | for (int j = R0.len(); j < Z.len(); j++) Z[j] = 0; |
197 | AddAndReturnOverflow(Z + i, R1); |
198 | AddAndReturnOverflow(Z + 2 * i, R2); |
199 | AddAndReturnOverflow(Z + 3 * i, R3); |
200 | AddAndReturnOverflow(Z + 4 * i, R4); |
201 | } |
202 | |
203 | void ProcessorImpl::MultiplyToomCook(RWDigits Z, Digits X, Digits Y) { |
204 | DCHECK(X.len() >= Y.len())(void(0)); |
205 | int k = Y.len(); |
206 | // TODO(jkummerow): Would it be a measurable improvement to share the |
207 | // scratch memory for several invocations? |
208 | Digits X0(X, 0, k); |
209 | Toom3Main(Z, X0, Y); |
210 | if (X.len() > Y.len()) { |
211 | ScratchDigits T(2 * k); |
212 | for (int i = k; i < X.len(); i += k) { |
213 | Digits Xi(X, i, k); |
214 | // TODO(jkummerow): would it be a measurable improvement to craft a |
215 | // "ToomChunk" method in the style of {KaratsubaChunk}? |
216 | Toom3Main(T, Xi, Y); |
217 | AddAndReturnOverflow(Z + i, T); // Can't overflow. |
218 | } |
219 | } |
220 | } |
221 | |
222 | } // namespace bigint |
223 | } // namespace v8 |