Crypto++  8.6
Free C++ class library of cryptographic schemes
lsh512_sse.cpp
1 // lsh.cpp - written and placed in the public domain by Jeffrey Walton
2 // Based on the specification and source code provided by
3 // Korea Internet & Security Agency (KISA) website. Also
4 // see https://seed.kisa.or.kr/kisa/algorithm/EgovLSHInfo.do
5 // and https://seed.kisa.or.kr/kisa/Board/22/detailView.do.
6 
7 // We are hitting some sort of GCC bug in the LSH AVX2 code path.
8 // Clang is OK on the AVX2 code path. We believe it is GCC Issue
9 // 82735, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735. It
10 // makes using zeroupper a little tricky.
11 
12 #include "pch.h"
13 #include "config.h"
14 
15 #include "lsh.h"
16 #include "misc.h"
17 
18 #if defined(CRYPTOPP_SSSE3_AVAILABLE) && defined(CRYPTOPP_ENABLE_64BIT_SSE)
19 
20 #if defined(CRYPTOPP_SSSE3_AVAILABLE)
21 # include <emmintrin.h>
22 # include <tmmintrin.h>
23 #endif
24 
25 #if defined(CRYPTOPP_XOP_AVAILABLE)
26 # include <ammintrin.h>
27 #endif
28 
29 // GCC at 4.5. Clang is unknown. Also see https://stackoverflow.com/a/42493893.
30 #if (CRYPTOPP_GCC_VERSION >= 40500)
31 # include <x86intrin.h>
32 #endif
33 
34 ANONYMOUS_NAMESPACE_BEGIN
35 
36 /* LSH Constants */
37 
38 const unsigned int LSH512_MSG_BLK_BYTE_LEN = 256;
39 // const unsigned int LSH512_MSG_BLK_BIT_LEN = 2048;
40 // const unsigned int LSH512_CV_BYTE_LEN = 128;
41 const unsigned int LSH512_HASH_VAL_MAX_BYTE_LEN = 64;
42 
43 // const unsigned int MSG_BLK_WORD_LEN = 32;
44 const unsigned int CV_WORD_LEN = 16;
45 const unsigned int CONST_WORD_LEN = 8;
46 // const unsigned int HASH_VAL_MAX_WORD_LEN = 8;
47 const unsigned int NUM_STEPS = 28;
48 
49 const unsigned int ROT_EVEN_ALPHA = 23;
50 const unsigned int ROT_EVEN_BETA = 59;
51 const unsigned int ROT_ODD_ALPHA = 7;
52 const unsigned int ROT_ODD_BETA = 3;
53 
54 const unsigned int LSH_TYPE_512_512 = 0x0010040;
55 const unsigned int LSH_TYPE_512_384 = 0x0010030;
56 const unsigned int LSH_TYPE_512_256 = 0x0010020;
57 const unsigned int LSH_TYPE_512_224 = 0x001001C;
58 
59 // const unsigned int LSH_TYPE_384 = LSH_TYPE_512_384;
60 // const unsigned int LSH_TYPE_512 = LSH_TYPE_512_512;
61 
62 /* Error Code */
63 
64 const unsigned int LSH_SUCCESS = 0x0;
65 // const unsigned int LSH_ERR_NULL_PTR = 0x2401;
66 // const unsigned int LSH_ERR_INVALID_ALGTYPE = 0x2402;
67 const unsigned int LSH_ERR_INVALID_DATABITLEN = 0x2403;
68 const unsigned int LSH_ERR_INVALID_STATE = 0x2404;
69 
70 /* Index into our state array */
71 
72 const unsigned int AlgorithmType = 80;
73 const unsigned int RemainingBits = 81;
74 
75 NAMESPACE_END
76 
77 NAMESPACE_BEGIN(CryptoPP)
78 NAMESPACE_BEGIN(LSH)
79 
80 // lsh512.cpp
81 extern const word64 LSH512_IV224[CV_WORD_LEN];
82 extern const word64 LSH512_IV256[CV_WORD_LEN];
83 extern const word64 LSH512_IV384[CV_WORD_LEN];
84 extern const word64 LSH512_IV512[CV_WORD_LEN];
85 extern const word64 LSH512_StepConstants[CONST_WORD_LEN * NUM_STEPS];
86 
87 NAMESPACE_END // LSH
88 NAMESPACE_END // Crypto++
89 
90 ANONYMOUS_NAMESPACE_BEGIN
91 
92 using CryptoPP::byte;
93 using CryptoPP::word32;
94 using CryptoPP::word64;
97 
98 using CryptoPP::GetBlock;
102 
103 using CryptoPP::LSH::LSH512_IV224;
104 using CryptoPP::LSH::LSH512_IV256;
105 using CryptoPP::LSH::LSH512_IV384;
106 using CryptoPP::LSH::LSH512_IV512;
107 using CryptoPP::LSH::LSH512_StepConstants;
108 
109 typedef byte lsh_u8;
110 typedef word32 lsh_u32;
111 typedef word64 lsh_u64;
112 typedef word32 lsh_uint;
113 typedef word32 lsh_err;
114 typedef word32 lsh_type;
115 
116 struct LSH512_SSSE3_Context
117 {
118  LSH512_SSSE3_Context(word64* state, word64 algType, word64& remainingBitLength) :
119  cv_l(state+0), cv_r(state+8), sub_msgs(state+16),
120  last_block(reinterpret_cast<byte*>(state+48)),
121  remain_databitlen(remainingBitLength),
122  alg_type(static_cast<lsh_type>(algType)) {}
123 
124  lsh_u64* cv_l; // start of our state block
125  lsh_u64* cv_r;
126  lsh_u64* sub_msgs;
127  lsh_u8* last_block;
128  lsh_u64& remain_databitlen;
129  lsh_type alg_type;
130 };
131 
132 struct LSH512_SSSE3_Internal
133 {
134  LSH512_SSSE3_Internal(word64* state) :
135  submsg_e_l(state+16), submsg_e_r(state+24),
136  submsg_o_l(state+32), submsg_o_r(state+40) { }
137 
138  lsh_u64* submsg_e_l; /* even left sub-message */
139  lsh_u64* submsg_e_r; /* even right sub-message */
140  lsh_u64* submsg_o_l; /* odd left sub-message */
141  lsh_u64* submsg_o_r; /* odd right sub-message */
142 };
143 
144 // const lsh_u32 g_gamma512[8] = { 0, 16, 32, 48, 8, 24, 40, 56 };
145 
146 /* LSH AlgType Macro */
147 
148 inline bool LSH_IS_LSH512(lsh_uint val) {
149  return (val & 0xf0000) == 0x10000;
150 }
151 
152 inline lsh_uint LSH_GET_SMALL_HASHBIT(lsh_uint val) {
153  return val >> 24;
154 }
155 
156 inline lsh_uint LSH_GET_HASHBYTE(lsh_uint val) {
157  return val & 0xffff;
158 }
159 
160 inline lsh_uint LSH_GET_HASHBIT(lsh_uint val) {
161  return (LSH_GET_HASHBYTE(val) << 3) - LSH_GET_SMALL_HASHBIT(val);
162 }
163 
164 inline lsh_u64 loadLE64(lsh_u64 v) {
166 }
167 
168 lsh_u64 ROTL64(lsh_u64 x, lsh_u32 r) {
169  return rotlFixed(x, r);
170 }
171 
172 // Original code relied upon unaligned lsh_u64 buffer
173 inline void load_msg_blk(LSH512_SSSE3_Internal* i_state, const lsh_u8 msgblk[LSH512_MSG_BLK_BYTE_LEN])
174 {
175  lsh_u64* submsg_e_l = i_state->submsg_e_l;
176  lsh_u64* submsg_e_r = i_state->submsg_e_r;
177  lsh_u64* submsg_o_l = i_state->submsg_o_l;
178  lsh_u64* submsg_o_r = i_state->submsg_o_r;
179 
180  _mm_storeu_si128(M128_CAST(submsg_e_l+0),
181  _mm_loadu_si128(CONST_M128_CAST(msgblk+0)));
182  _mm_storeu_si128(M128_CAST(submsg_e_l+2),
183  _mm_loadu_si128(CONST_M128_CAST(msgblk+16)));
184  _mm_storeu_si128(M128_CAST(submsg_e_l+4),
185  _mm_loadu_si128(CONST_M128_CAST(msgblk+32)));
186  _mm_storeu_si128(M128_CAST(submsg_e_l+6),
187  _mm_loadu_si128(CONST_M128_CAST(msgblk+48)));
188 
189  _mm_storeu_si128(M128_CAST(submsg_e_r+0),
190  _mm_loadu_si128(CONST_M128_CAST(msgblk+64)));
191  _mm_storeu_si128(M128_CAST(submsg_e_r+2),
192  _mm_loadu_si128(CONST_M128_CAST(msgblk+80)));
193  _mm_storeu_si128(M128_CAST(submsg_e_r+4),
194  _mm_loadu_si128(CONST_M128_CAST(msgblk+96)));
195  _mm_storeu_si128(M128_CAST(submsg_e_r+6),
196  _mm_loadu_si128(CONST_M128_CAST(msgblk+112)));
197 
198  _mm_storeu_si128(M128_CAST(submsg_o_l+0),
199  _mm_loadu_si128(CONST_M128_CAST(msgblk+128)));
200  _mm_storeu_si128(M128_CAST(submsg_o_l+2),
201  _mm_loadu_si128(CONST_M128_CAST(msgblk+144)));
202  _mm_storeu_si128(M128_CAST(submsg_o_l+4),
203  _mm_loadu_si128(CONST_M128_CAST(msgblk+160)));
204  _mm_storeu_si128(M128_CAST(submsg_o_l+6),
205  _mm_loadu_si128(CONST_M128_CAST(msgblk+176)));
206 
207  _mm_storeu_si128(M128_CAST(submsg_o_r+0),
208  _mm_loadu_si128(CONST_M128_CAST(msgblk+192)));
209  _mm_storeu_si128(M128_CAST(submsg_o_r+2),
210  _mm_loadu_si128(CONST_M128_CAST(msgblk+208)));
211  _mm_storeu_si128(M128_CAST(submsg_o_r+4),
212  _mm_loadu_si128(CONST_M128_CAST(msgblk+224)));
213  _mm_storeu_si128(M128_CAST(submsg_o_r+6),
214  _mm_loadu_si128(CONST_M128_CAST(msgblk+240)));
215 }
216 
217 inline void msg_exp_even(LSH512_SSSE3_Internal* i_state)
218 {
219  CRYPTOPP_ASSERT(i_state != NULLPTR);
220 
221  lsh_u64* submsg_e_l = i_state->submsg_e_l;
222  lsh_u64* submsg_e_r = i_state->submsg_e_r;
223  lsh_u64* submsg_o_l = i_state->submsg_o_l;
224  lsh_u64* submsg_o_r = i_state->submsg_o_r;
225 
226  __m128i temp;
227  _mm_storeu_si128(M128_CAST(submsg_e_l+2), _mm_shuffle_epi32(
228  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+2)), _MM_SHUFFLE(1,0,3,2)));
229 
230  temp = _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0));
231  _mm_storeu_si128(M128_CAST(submsg_e_l+0),
232  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+2)));
233  _mm_storeu_si128(M128_CAST(submsg_e_l+2), temp);
234  _mm_storeu_si128(M128_CAST(submsg_e_l+6), _mm_shuffle_epi32(
235  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6)), _MM_SHUFFLE(1,0,3,2)));
236 
237  temp = _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4));
238  _mm_storeu_si128(M128_CAST(submsg_e_l+4), _mm_unpacklo_epi64(
239  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6)),
240  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4))));
241  _mm_storeu_si128(M128_CAST(submsg_e_l+6), _mm_unpackhi_epi64(
242  temp, _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6))));
243  _mm_storeu_si128(M128_CAST(submsg_e_r+2), _mm_shuffle_epi32(
244  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+2)), _MM_SHUFFLE(1,0,3,2)));
245 
246  temp = _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0));
247  _mm_storeu_si128(M128_CAST(submsg_e_r+0),
248  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+2)));
249  _mm_storeu_si128(M128_CAST(submsg_e_r+2), temp);
250  _mm_storeu_si128(M128_CAST(submsg_e_r+6), _mm_shuffle_epi32(
251  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6)), _MM_SHUFFLE(1,0,3,2)));
252 
253  temp = _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4));
254  _mm_storeu_si128(M128_CAST(submsg_e_r+4), _mm_unpacklo_epi64(
255  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6)),
256  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4))));
257  _mm_storeu_si128(M128_CAST(submsg_e_r+6), _mm_unpackhi_epi64(
258  temp, _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6))));
259 
260  _mm_storeu_si128(M128_CAST(submsg_e_l+0), _mm_add_epi64(
261  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+0)),
262  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0))));
263  _mm_storeu_si128(M128_CAST(submsg_e_l+2), _mm_add_epi64(
264  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+2)),
265  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+2))));
266  _mm_storeu_si128(M128_CAST(submsg_e_l+4), _mm_add_epi64(
267  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4)),
268  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4))));
269  _mm_storeu_si128(M128_CAST(submsg_e_l+6), _mm_add_epi64(
270  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6)),
271  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6))));
272 
273  _mm_storeu_si128(M128_CAST(submsg_e_r+0), _mm_add_epi64(
274  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+0)),
275  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0))));
276  _mm_storeu_si128(M128_CAST(submsg_e_r+2), _mm_add_epi64(
277  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+2)),
278  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+2))));
279  _mm_storeu_si128(M128_CAST(submsg_e_r+4), _mm_add_epi64(
280  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4)),
281  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4))));
282  _mm_storeu_si128(M128_CAST(submsg_e_r+6), _mm_add_epi64(
283  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6)),
284  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6))));
285 }
286 
287 inline void msg_exp_odd(LSH512_SSSE3_Internal* i_state)
288 {
289  CRYPTOPP_ASSERT(i_state != NULLPTR);
290 
291  lsh_u64* submsg_e_l = i_state->submsg_e_l;
292  lsh_u64* submsg_e_r = i_state->submsg_e_r;
293  lsh_u64* submsg_o_l = i_state->submsg_o_l;
294  lsh_u64* submsg_o_r = i_state->submsg_o_r;
295 
296  __m128i temp;
297  _mm_storeu_si128(M128_CAST(submsg_o_l+2), _mm_shuffle_epi32(
298  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+2)), _MM_SHUFFLE(1,0,3,2)));
299 
300  temp = _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+0));
301  _mm_storeu_si128(M128_CAST(submsg_o_l+0),
302  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+2)));
303  _mm_storeu_si128(M128_CAST(submsg_o_l+2), temp);
304  _mm_storeu_si128(M128_CAST(submsg_o_l+6), _mm_shuffle_epi32(
305  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6)), _MM_SHUFFLE(1,0,3,2)));
306 
307  temp = _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4));
308  _mm_storeu_si128(M128_CAST(submsg_o_l+4), _mm_unpacklo_epi64(
309  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6)),
310  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4))));
311  _mm_storeu_si128(M128_CAST(submsg_o_l+6), _mm_unpackhi_epi64(
312  temp, _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6))));
313  _mm_storeu_si128(M128_CAST(submsg_o_r+2), _mm_shuffle_epi32(
314  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+2)), _MM_SHUFFLE(1,0,3,2)));
315 
316  temp = _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+0));
317  _mm_storeu_si128(M128_CAST(submsg_o_r+0),
318  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+2)));
319  _mm_storeu_si128(M128_CAST(submsg_o_r+2), temp);
320  _mm_storeu_si128(M128_CAST(submsg_o_r+6), _mm_shuffle_epi32(
321  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6)), _MM_SHUFFLE(1,0,3,2)));
322 
323  temp = _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4));
324  _mm_storeu_si128(M128_CAST(submsg_o_r+4), _mm_unpacklo_epi64(
325  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6)),
326  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4))));
327  _mm_storeu_si128(M128_CAST(submsg_o_r+6), _mm_unpackhi_epi64(
328  temp, _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6))));
329 
330  _mm_storeu_si128(M128_CAST(submsg_o_l+0), _mm_add_epi64(
331  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0)),
332  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+0))));
333  _mm_storeu_si128(M128_CAST(submsg_o_l+2), _mm_add_epi64(
334  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+2)),
335  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+2))));
336  _mm_storeu_si128(M128_CAST(submsg_o_l+4), _mm_add_epi64(
337  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4)),
338  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4))));
339  _mm_storeu_si128(M128_CAST(submsg_o_l+6), _mm_add_epi64(
340  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6)),
341  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6))));
342 
343  _mm_storeu_si128(M128_CAST(submsg_o_r+0), _mm_add_epi64(
344  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0)),
345  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+0))));
346  _mm_storeu_si128(M128_CAST(submsg_o_r+2), _mm_add_epi64(
347  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+2)),
348  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+2))));
349  _mm_storeu_si128(M128_CAST(submsg_o_r+4), _mm_add_epi64(
350  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4)),
351  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4))));
352  _mm_storeu_si128(M128_CAST(submsg_o_r+6), _mm_add_epi64(
353  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6)),
354  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6))));
355 }
356 
357 inline void load_sc(const lsh_u64** p_const_v, size_t i)
358 {
359  *p_const_v = &LSH512_StepConstants[i];
360 }
361 
362 inline void msg_add_even(lsh_u64 cv_l[8], lsh_u64 cv_r[8], LSH512_SSSE3_Internal* i_state)
363 {
364  CRYPTOPP_ASSERT(i_state != NULLPTR);
365 
366  lsh_u64* submsg_e_l = i_state->submsg_e_l;
367  lsh_u64* submsg_e_r = i_state->submsg_e_r;
368 
369  _mm_storeu_si128(M128_CAST(cv_l), _mm_xor_si128(
370  _mm_loadu_si128(CONST_M128_CAST(cv_l)),
371  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l))));
372  _mm_storeu_si128(M128_CAST(cv_r), _mm_xor_si128(
373  _mm_loadu_si128(CONST_M128_CAST(cv_r)),
374  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r))));
375  _mm_storeu_si128(M128_CAST(cv_l+2), _mm_xor_si128(
376  _mm_loadu_si128(CONST_M128_CAST(cv_l+2)),
377  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+2))));
378  _mm_storeu_si128(M128_CAST(cv_r+2), _mm_xor_si128(
379  _mm_loadu_si128(CONST_M128_CAST(cv_r+2)),
380  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+2))));
381  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
382  _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
383  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4))));
384  _mm_storeu_si128(M128_CAST(cv_r+4), _mm_xor_si128(
385  _mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
386  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4))));
387  _mm_storeu_si128(M128_CAST(cv_l+6), _mm_xor_si128(
388  _mm_loadu_si128(CONST_M128_CAST(cv_l+6)),
389  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6))));
390  _mm_storeu_si128(M128_CAST(cv_r+6), _mm_xor_si128(
391  _mm_loadu_si128(CONST_M128_CAST(cv_r+6)),
392  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6))));
393 }
394 
395 inline void msg_add_odd(lsh_u64 cv_l[8], lsh_u64 cv_r[8], LSH512_SSSE3_Internal* i_state)
396 {
397  CRYPTOPP_ASSERT(i_state != NULLPTR);
398 
399  lsh_u64* submsg_o_l = i_state->submsg_o_l;
400  lsh_u64* submsg_o_r = i_state->submsg_o_r;
401 
402  _mm_storeu_si128(M128_CAST(cv_l), _mm_xor_si128(
403  _mm_loadu_si128(CONST_M128_CAST(cv_l)),
404  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l))));
405  _mm_storeu_si128(M128_CAST(cv_r), _mm_xor_si128(
406  _mm_loadu_si128(CONST_M128_CAST(cv_r)),
407  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r))));
408  _mm_storeu_si128(M128_CAST(cv_l+2), _mm_xor_si128(
409  _mm_loadu_si128(CONST_M128_CAST(cv_l+2)),
410  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+2))));
411  _mm_storeu_si128(M128_CAST(cv_r+2), _mm_xor_si128(
412  _mm_loadu_si128(CONST_M128_CAST(cv_r+2)),
413  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+2))));
414  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
415  _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
416  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4))));
417  _mm_storeu_si128(M128_CAST(cv_r+4), _mm_xor_si128(
418  _mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
419  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4))));
420  _mm_storeu_si128(M128_CAST(cv_l+6), _mm_xor_si128(
421  _mm_loadu_si128(CONST_M128_CAST(cv_l+6)),
422  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6))));
423  _mm_storeu_si128(M128_CAST(cv_r+6), _mm_xor_si128(
424  _mm_loadu_si128(CONST_M128_CAST(cv_r+6)),
425  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6))));
426 }
427 
428 inline void add_blk(lsh_u64 cv_l[8], lsh_u64 cv_r[8])
429 {
430  _mm_storeu_si128(M128_CAST(cv_l), _mm_add_epi64(
431  _mm_loadu_si128(CONST_M128_CAST(cv_l)),
432  _mm_loadu_si128(CONST_M128_CAST(cv_r))));
433  _mm_storeu_si128(M128_CAST(cv_l+2), _mm_add_epi64(
434  _mm_loadu_si128(CONST_M128_CAST(cv_l+2)),
435  _mm_loadu_si128(CONST_M128_CAST(cv_r+2))));
436  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_add_epi64(
437  _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
438  _mm_loadu_si128(CONST_M128_CAST(cv_r+4))));
439  _mm_storeu_si128(M128_CAST(cv_l+6), _mm_add_epi64(
440  _mm_loadu_si128(CONST_M128_CAST(cv_l+6)),
441  _mm_loadu_si128(CONST_M128_CAST(cv_r+6))));
442 }
443 
444 template <unsigned int R>
445 inline void rotate_blk(lsh_u64 cv[8])
446 {
447 #if defined(CRYPTOPP_XOP_AVAILABLE)
448  _mm_storeu_si128(M128_CAST(cv),
449  _mm_roti_epi64(_mm_loadu_si128(CONST_M128_CAST(cv)), R));
450  _mm_storeu_si128(M128_CAST(cv+2),
451  _mm_roti_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+2)), R));
452  _mm_storeu_si128(M128_CAST(cv+4),
453  _mm_roti_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+4)), R));
454  _mm_storeu_si128(M128_CAST(cv+6),
455  _mm_roti_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+6)), R));
456 
457 #else
458  _mm_storeu_si128(M128_CAST(cv), _mm_or_si128(
459  _mm_slli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv)), R),
460  _mm_srli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv)), 64-R)));
461  _mm_storeu_si128(M128_CAST(cv+2), _mm_or_si128(
462  _mm_slli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+2)), R),
463  _mm_srli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+2)), 64-R)));
464  _mm_storeu_si128(M128_CAST(cv+4), _mm_or_si128(
465  _mm_slli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+4)), R),
466  _mm_srli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+4)), 64-R)));
467  _mm_storeu_si128(M128_CAST(cv+6), _mm_or_si128(
468  _mm_slli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+6)), R),
469  _mm_srli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+6)), 64-R)));
470 #endif
471 }
472 
473 inline void xor_with_const(lsh_u64 cv_l[8], const lsh_u64 const_v[8])
474 {
475  _mm_storeu_si128(M128_CAST(cv_l), _mm_xor_si128(
476  _mm_loadu_si128(CONST_M128_CAST(cv_l)),
477  _mm_loadu_si128(CONST_M128_CAST(const_v))));
478  _mm_storeu_si128(M128_CAST(cv_l+2), _mm_xor_si128(
479  _mm_loadu_si128(CONST_M128_CAST(cv_l+2)),
480  _mm_loadu_si128(CONST_M128_CAST(const_v+2))));
481  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
482  _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
483  _mm_loadu_si128(CONST_M128_CAST(const_v+4))));
484  _mm_storeu_si128(M128_CAST(cv_l+6), _mm_xor_si128(
485  _mm_loadu_si128(CONST_M128_CAST(cv_l+6)),
486  _mm_loadu_si128(CONST_M128_CAST(const_v+6))));
487 }
488 
489 inline void rotate_msg_gamma(lsh_u64 cv_r[8])
490 {
491  // g_gamma512[8] = { 0, 16, 32, 48, 8, 24, 40, 56 };
492  _mm_storeu_si128(M128_CAST(cv_r+0),
493  _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+0)),
494  _mm_set_epi8(13,12,11,10, 9,8,15,14, 7,6,5,4, 3,2,1,0)));
495  _mm_storeu_si128(M128_CAST(cv_r+2),
496  _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+2)),
497  _mm_set_epi8(9,8,15,14, 13,12,11,10, 3,2,1,0, 7,6,5,4)));
498 
499  _mm_storeu_si128(M128_CAST(cv_r+4),
500  _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
501  _mm_set_epi8(12,11,10,9, 8,15,14,13, 6,5,4,3, 2,1,0,7)));
502  _mm_storeu_si128(M128_CAST(cv_r+6),
503  _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+6)),
504  _mm_set_epi8(8,15,14,13, 12,11,10,9, 2,1,0,7, 6,5,4,3)));
505 }
506 
507 inline void word_perm(lsh_u64 cv_l[8], lsh_u64 cv_r[8])
508 {
509  __m128i temp[2];
510  temp[0] = _mm_loadu_si128(CONST_M128_CAST(cv_l+0));
511  _mm_storeu_si128(M128_CAST(cv_l+0), _mm_unpacklo_epi64(
512  _mm_loadu_si128(CONST_M128_CAST(cv_l+2)),
513  _mm_loadu_si128(CONST_M128_CAST(cv_l+0))));
514  _mm_storeu_si128(M128_CAST(cv_l+2), _mm_unpackhi_epi64(
515  temp[0], _mm_loadu_si128(CONST_M128_CAST(cv_l+2))));
516 
517  temp[0] = _mm_loadu_si128(CONST_M128_CAST(cv_l+4));
518  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_unpacklo_epi64(
519  _mm_loadu_si128(CONST_M128_CAST(cv_l+6)),
520  _mm_loadu_si128(CONST_M128_CAST(cv_l+4))));
521  _mm_storeu_si128(M128_CAST(cv_l+6), _mm_unpackhi_epi64(
522  temp[0], _mm_loadu_si128(CONST_M128_CAST(cv_l+6))));
523  _mm_storeu_si128(M128_CAST(cv_r+2), _mm_shuffle_epi32(
524  _mm_loadu_si128(CONST_M128_CAST(cv_r+2)), _MM_SHUFFLE(1,0,3,2)));
525 
526  temp[0] = _mm_loadu_si128(CONST_M128_CAST(cv_r+0));
527  _mm_storeu_si128(M128_CAST(cv_r+0), _mm_unpacklo_epi64(
528  _mm_loadu_si128(CONST_M128_CAST(cv_r+0)),
529  _mm_loadu_si128(CONST_M128_CAST(cv_r+2))));
530  _mm_storeu_si128(M128_CAST(cv_r+2), _mm_unpackhi_epi64(
531  _mm_loadu_si128(CONST_M128_CAST(cv_r+2)), temp[0]));
532  _mm_storeu_si128(M128_CAST(cv_r+6), _mm_shuffle_epi32(
533  _mm_loadu_si128(CONST_M128_CAST(cv_r+6)), _MM_SHUFFLE(1,0,3,2)));
534 
535  temp[0] = _mm_loadu_si128(CONST_M128_CAST(cv_r+4));
536  _mm_storeu_si128(M128_CAST(cv_r+4), _mm_unpacklo_epi64(
537  _mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
538  _mm_loadu_si128(CONST_M128_CAST(cv_r+6))));
539  _mm_storeu_si128(M128_CAST(cv_r+6), _mm_unpackhi_epi64(
540  _mm_loadu_si128(CONST_M128_CAST(cv_r+6)), temp[0]));
541 
542  temp[0] = _mm_loadu_si128(CONST_M128_CAST(cv_l+0));
543  temp[1] = _mm_loadu_si128(CONST_M128_CAST(cv_l+2));
544 
545  _mm_storeu_si128(M128_CAST(cv_l+0),
546  _mm_loadu_si128(CONST_M128_CAST(cv_l+4)));
547  _mm_storeu_si128(M128_CAST(cv_l+2),
548  _mm_loadu_si128(CONST_M128_CAST(cv_l+6)));
549  _mm_storeu_si128(M128_CAST(cv_l+4),
550  _mm_loadu_si128(CONST_M128_CAST(cv_r+4)));
551  _mm_storeu_si128(M128_CAST(cv_l+6),
552  _mm_loadu_si128(CONST_M128_CAST(cv_r+6)));
553  _mm_storeu_si128(M128_CAST(cv_r+4),
554  _mm_loadu_si128(CONST_M128_CAST(cv_r+0)));
555  _mm_storeu_si128(M128_CAST(cv_r+6),
556  _mm_loadu_si128(CONST_M128_CAST(cv_r+2)));
557 
558  _mm_storeu_si128(M128_CAST(cv_r+0), temp[0]);
559  _mm_storeu_si128(M128_CAST(cv_r+2), temp[1]);
560 };
561 
562 /* -------------------------------------------------------- *
563 * step function
564 * -------------------------------------------------------- */
565 
566 template <unsigned int Alpha, unsigned int Beta>
567 inline void mix(lsh_u64 cv_l[8], lsh_u64 cv_r[8], const lsh_u64 const_v[8])
568 {
569  add_blk(cv_l, cv_r);
570  rotate_blk<Alpha>(cv_l);
571  xor_with_const(cv_l, const_v);
572  add_blk(cv_r, cv_l);
573  rotate_blk<Beta>(cv_r);
574  add_blk(cv_l, cv_r);
575  rotate_msg_gamma(cv_r);
576 }
577 
578 /* -------------------------------------------------------- *
579 * compression function
580 * -------------------------------------------------------- */
581 
582 inline void compress(LSH512_SSSE3_Context* ctx, const lsh_u8 pdMsgBlk[LSH512_MSG_BLK_BYTE_LEN])
583 {
584  CRYPTOPP_ASSERT(ctx != NULLPTR);
585 
586  LSH512_SSSE3_Internal s_state(ctx->cv_l);
587  LSH512_SSSE3_Internal* i_state = &s_state;
588 
589  const lsh_u64* const_v = NULL;
590  lsh_u64 *cv_l = ctx->cv_l;
591  lsh_u64 *cv_r = ctx->cv_r;
592 
593  load_msg_blk(i_state, pdMsgBlk);
594 
595  msg_add_even(cv_l, cv_r, i_state);
596  load_sc(&const_v, 0);
597  mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
598  word_perm(cv_l, cv_r);
599 
600  msg_add_odd(cv_l, cv_r, i_state);
601  load_sc(&const_v, 8);
602  mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
603  word_perm(cv_l, cv_r);
604 
605  for (size_t i = 1; i < NUM_STEPS / 2; i++)
606  {
607  msg_exp_even(i_state);
608  msg_add_even(cv_l, cv_r, i_state);
609  load_sc(&const_v, 16 * i);
610  mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
611  word_perm(cv_l, cv_r);
612 
613  msg_exp_odd(i_state);
614  msg_add_odd(cv_l, cv_r, i_state);
615  load_sc(&const_v, 16 * i + 8);
616  mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
617  word_perm(cv_l, cv_r);
618  }
619 
620  msg_exp_even(i_state);
621  msg_add_even(cv_l, cv_r, i_state);
622 }
623 
624 /* -------------------------------------------------------- */
625 
626 inline void load_iv(word64 cv_l[8], word64 cv_r[8], const word64 iv[16])
627 {
628  // The IV's are 32-byte aligned so we can use aligned loads.
629  _mm_storeu_si128(M128_CAST(cv_l+0),
630  _mm_load_si128(CONST_M128_CAST(iv+0)));
631  _mm_storeu_si128(M128_CAST(cv_l+2),
632  _mm_load_si128(CONST_M128_CAST(iv+2)));
633  _mm_storeu_si128(M128_CAST(cv_l+4),
634  _mm_load_si128(CONST_M128_CAST(iv+4)));
635  _mm_storeu_si128(M128_CAST(cv_l+6),
636  _mm_load_si128(CONST_M128_CAST(iv+6)));
637  _mm_storeu_si128(M128_CAST(cv_r+0),
638  _mm_load_si128(CONST_M128_CAST(iv+8)));
639  _mm_storeu_si128(M128_CAST(cv_r+2),
640  _mm_load_si128(CONST_M128_CAST(iv+10)));
641  _mm_storeu_si128(M128_CAST(cv_r+4),
642  _mm_load_si128(CONST_M128_CAST(iv+12)));
643  _mm_storeu_si128(M128_CAST(cv_r+6),
644  _mm_load_si128(CONST_M128_CAST(iv+14)));
645 }
646 
647 inline void zero_iv(lsh_u64 cv_l[8], lsh_u64 cv_r[8])
648 {
649  _mm_storeu_si128(M128_CAST(cv_l+0), _mm_setzero_si128());
650  _mm_storeu_si128(M128_CAST(cv_l+2), _mm_setzero_si128());
651  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_setzero_si128());
652  _mm_storeu_si128(M128_CAST(cv_l+6), _mm_setzero_si128());
653  _mm_storeu_si128(M128_CAST(cv_r+0), _mm_setzero_si128());
654  _mm_storeu_si128(M128_CAST(cv_r+2), _mm_setzero_si128());
655  _mm_storeu_si128(M128_CAST(cv_r+4), _mm_setzero_si128());
656  _mm_storeu_si128(M128_CAST(cv_r+6), _mm_setzero_si128());
657 }
658 
659 inline void zero_submsgs(LSH512_SSSE3_Context* ctx)
660 {
661  lsh_u64* sub_msgs = ctx->sub_msgs;
662 
663  _mm_storeu_si128(M128_CAST(sub_msgs+ 0),
664  _mm_setzero_si128());
665  _mm_storeu_si128(M128_CAST(sub_msgs+ 2),
666  _mm_setzero_si128());
667  _mm_storeu_si128(M128_CAST(sub_msgs+ 4),
668  _mm_setzero_si128());
669  _mm_storeu_si128(M128_CAST(sub_msgs+ 6),
670  _mm_setzero_si128());
671  _mm_storeu_si128(M128_CAST(sub_msgs+ 8),
672  _mm_setzero_si128());
673  _mm_storeu_si128(M128_CAST(sub_msgs+10),
674  _mm_setzero_si128());
675  _mm_storeu_si128(M128_CAST(sub_msgs+12),
676  _mm_setzero_si128());
677  _mm_storeu_si128(M128_CAST(sub_msgs+14),
678  _mm_setzero_si128());
679 }
680 
681 inline void init224(LSH512_SSSE3_Context* ctx)
682 {
683  CRYPTOPP_ASSERT(ctx != NULLPTR);
684 
685  zero_submsgs(ctx);
686  load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV224);
687 }
688 
689 inline void init256(LSH512_SSSE3_Context* ctx)
690 {
691  CRYPTOPP_ASSERT(ctx != NULLPTR);
692 
693  zero_submsgs(ctx);
694  load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV256);
695 }
696 
697 inline void init384(LSH512_SSSE3_Context* ctx)
698 {
699  CRYPTOPP_ASSERT(ctx != NULLPTR);
700 
701  zero_submsgs(ctx);
702  load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV384);
703 }
704 
705 inline void init512(LSH512_SSSE3_Context* ctx)
706 {
707  CRYPTOPP_ASSERT(ctx != NULLPTR);
708 
709  zero_submsgs(ctx);
710  load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV512);
711 }
712 
713 /* -------------------------------------------------------- */
714 
715 inline void fin(LSH512_SSSE3_Context* ctx)
716 {
717  CRYPTOPP_ASSERT(ctx != NULLPTR);
718 
719  _mm_storeu_si128(M128_CAST(ctx->cv_l+0), _mm_xor_si128(
720  _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+0)),
721  _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+0))));
722  _mm_storeu_si128(M128_CAST(ctx->cv_l+2), _mm_xor_si128(
723  _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+2)),
724  _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+2))));
725  _mm_storeu_si128(M128_CAST(ctx->cv_l+4), _mm_xor_si128(
726  _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+4)),
727  _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+4))));
728  _mm_storeu_si128(M128_CAST(ctx->cv_l+6), _mm_xor_si128(
729  _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+6)),
730  _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+6))));
731 }
732 
733 /* -------------------------------------------------------- */
734 
735 inline void get_hash(LSH512_SSSE3_Context* ctx, lsh_u8* pbHashVal)
736 {
737  CRYPTOPP_ASSERT(ctx != NULLPTR);
738  CRYPTOPP_ASSERT(ctx->alg_type != 0);
739  CRYPTOPP_ASSERT(pbHashVal != NULLPTR);
740 
741  lsh_uint alg_type = ctx->alg_type;
742  lsh_uint hash_val_byte_len = LSH_GET_HASHBYTE(alg_type);
743  lsh_uint hash_val_bit_len = LSH_GET_SMALL_HASHBIT(alg_type);
744 
745  // Multiplying by sizeof(lsh_u8) looks odd...
746  memcpy(pbHashVal, ctx->cv_l, hash_val_byte_len);
747  if (hash_val_bit_len){
748  pbHashVal[hash_val_byte_len-1] &= (((lsh_u8)0xff) << hash_val_bit_len);
749  }
750 }
751 
752 /* -------------------------------------------------------- */
753 
754 lsh_err lsh512_init_ssse3(LSH512_SSSE3_Context* ctx)
755 {
756  CRYPTOPP_ASSERT(ctx != NULLPTR);
757  CRYPTOPP_ASSERT(ctx->alg_type != 0);
758 
759  lsh_u32 alg_type = ctx->alg_type;
760  const lsh_u64* const_v = NULL;
761  ctx->remain_databitlen = 0;
762 
763  switch (alg_type){
764  case LSH_TYPE_512_512:
765  init512(ctx);
766  return LSH_SUCCESS;
767  case LSH_TYPE_512_384:
768  init384(ctx);
769  return LSH_SUCCESS;
770  case LSH_TYPE_512_256:
771  init256(ctx);
772  return LSH_SUCCESS;
773  case LSH_TYPE_512_224:
774  init224(ctx);
775  return LSH_SUCCESS;
776  default:
777  break;
778  }
779 
780  lsh_u64* cv_l = ctx->cv_l;
781  lsh_u64* cv_r = ctx->cv_r;
782 
783  zero_iv(cv_l, cv_r);
784  cv_l[0] = LSH512_HASH_VAL_MAX_BYTE_LEN;
785  cv_l[1] = LSH_GET_HASHBIT(alg_type);
786 
787  for (size_t i = 0; i < NUM_STEPS / 2; i++)
788  {
789  //Mix
790  load_sc(&const_v, i * 16);
791  mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
792  word_perm(cv_l, cv_r);
793 
794  load_sc(&const_v, i * 16 + 8);
795  mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
796  word_perm(cv_l, cv_r);
797  }
798 
799  return LSH_SUCCESS;
800 }
801 
802 lsh_err lsh512_update_ssse3(LSH512_SSSE3_Context* ctx, const lsh_u8* data, size_t databitlen)
803 {
804  CRYPTOPP_ASSERT(ctx != NULLPTR);
805  CRYPTOPP_ASSERT(data != NULLPTR);
806  CRYPTOPP_ASSERT(databitlen % 8 == 0);
807  CRYPTOPP_ASSERT(ctx->alg_type != 0);
808 
809  if (databitlen == 0){
810  return LSH_SUCCESS;
811  }
812 
813  // We are byte oriented. tail bits will always be 0.
814  size_t databytelen = databitlen >> 3;
815  // lsh_uint pos2 = databitlen & 0x7;
816  const size_t pos2 = 0;
817 
818  size_t remain_msg_byte = static_cast<size_t>(ctx->remain_databitlen >> 3);
819  // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
820  const size_t remain_msg_bit = 0;
821 
822  if (remain_msg_byte >= LSH512_MSG_BLK_BYTE_LEN){
823  return LSH_ERR_INVALID_STATE;
824  }
825  if (remain_msg_bit > 0){
826  return LSH_ERR_INVALID_DATABITLEN;
827  }
828 
829  if (databytelen + remain_msg_byte < LSH512_MSG_BLK_BYTE_LEN){
830  memcpy(ctx->last_block + remain_msg_byte, data, databytelen);
831  ctx->remain_databitlen += (lsh_uint)databitlen;
832  remain_msg_byte += (lsh_uint)databytelen;
833  if (pos2){
834  ctx->last_block[remain_msg_byte] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
835  }
836  return LSH_SUCCESS;
837  }
838 
839  if (remain_msg_byte > 0){
840  size_t more_byte = LSH512_MSG_BLK_BYTE_LEN - remain_msg_byte;
841  memcpy(ctx->last_block + remain_msg_byte, data, more_byte);
842  compress(ctx, ctx->last_block);
843  data += more_byte;
844  databytelen -= more_byte;
845  remain_msg_byte = 0;
846  ctx->remain_databitlen = 0;
847  }
848 
849  while (databytelen >= LSH512_MSG_BLK_BYTE_LEN)
850  {
851  // This call to compress caused some trouble.
852  // The data pointer can become unaligned in the
853  // previous block.
854  compress(ctx, data);
855  data += LSH512_MSG_BLK_BYTE_LEN;
856  databytelen -= LSH512_MSG_BLK_BYTE_LEN;
857  }
858 
859  if (databytelen > 0){
860  memcpy(ctx->last_block, data, databytelen);
861  ctx->remain_databitlen = (lsh_uint)(databytelen << 3);
862  }
863 
864  if (pos2){
865  ctx->last_block[databytelen] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
866  ctx->remain_databitlen += pos2;
867  }
868  return LSH_SUCCESS;
869 }
870 
871 lsh_err lsh512_final_ssse3(LSH512_SSSE3_Context* ctx, lsh_u8* hashval)
872 {
873  CRYPTOPP_ASSERT(ctx != NULLPTR);
874  CRYPTOPP_ASSERT(hashval != NULLPTR);
875 
876  // We are byte oriented. tail bits will always be 0.
877  size_t remain_msg_byte = static_cast<size_t>(ctx->remain_databitlen >> 3);
878  // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
879  const size_t remain_msg_bit = 0;
880 
881  if (remain_msg_byte >= LSH512_MSG_BLK_BYTE_LEN){
882  return LSH_ERR_INVALID_STATE;
883  }
884 
885  if (remain_msg_bit){
886  ctx->last_block[remain_msg_byte] |= (0x1 << (7 - remain_msg_bit));
887  }
888  else{
889  ctx->last_block[remain_msg_byte] = 0x80;
890  }
891  memset(ctx->last_block + remain_msg_byte + 1, 0, LSH512_MSG_BLK_BYTE_LEN - remain_msg_byte - 1);
892 
893  compress(ctx, ctx->last_block);
894 
895  fin(ctx);
896  get_hash(ctx, hashval);
897 
898  return LSH_SUCCESS;
899 }
900 
901 ANONYMOUS_NAMESPACE_END
902 
903 NAMESPACE_BEGIN(CryptoPP)
904 
905 extern
906 void LSH512_Base_Restart_SSSE3(word64* state)
907 {
908  state[RemainingBits] = 0;
909  LSH512_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
910  lsh_err err = lsh512_init_ssse3(&ctx);
911 
912  if (err != LSH_SUCCESS)
913  throw Exception(Exception::OTHER_ERROR, "LSH512_Base: lsh512_init_ssse3 failed");
914 }
915 
916 extern
917 void LSH512_Base_Update_SSSE3(word64* state, const byte *input, size_t size)
918 {
919  LSH512_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
920  lsh_err err = lsh512_update_ssse3(&ctx, input, 8*size);
921 
922  if (err != LSH_SUCCESS)
923  throw Exception(Exception::OTHER_ERROR, "LSH512_Base: lsh512_update_ssse3 failed");
924 }
925 
926 extern
927 void LSH512_Base_TruncatedFinal_SSSE3(word64* state, byte *hash, size_t)
928 {
929  LSH512_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
930  lsh_err err = lsh512_final_ssse3(&ctx, hash);
931 
932  if (err != LSH_SUCCESS)
933  throw Exception(Exception::OTHER_ERROR, "LSH512_Base: lsh512_final_ssse3 failed");
934 }
935 
936 NAMESPACE_END
937 
938 #endif // CRYPTOPP_SSSE3_AVAILABLE
Base class for all exceptions thrown by the library.
Definition: cryptlib.h:158
#define CONST_M128_CAST(x)
Clang workaround.
Definition: adv_simd.h:614
Utility functions for the Crypto++ library.
T rotlFixed(T x, unsigned int y)
Performs a left rotate.
Definition: misc.h:1598
unsigned int word32
32-bit unsigned datatype
Definition: config_int.h:62
Classes for the LSH hash functions.
Some other error occurred not belonging to other categories.
Definition: cryptlib.h:177
EnumToType< ByteOrder, LITTLE_ENDIAN_ORDER > LittleEndian
Provides a constant for LittleEndian.
Definition: cryptlib.h:150
Library configuration file.
byte order is little-endian
Definition: cryptlib.h:145
T rotlConstant(T x)
Performs a left rotate.
Definition: misc.h:1547
unsigned long long word64
64-bit unsigned datatype
Definition: config_int.h:91
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianness.
Definition: misc.h:2187
#define M128_CAST(x)
Clang workaround.
Definition: adv_simd.h:609
Precompiled header file.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:68
unsigned char byte
8-bit unsigned datatype
Definition: config_int.h:56
Crypto++ library namespace.