2C2EF2GKTOAT7QI56KKDFGRGSFZRDPKUASNQFB6XQDROAEPPZW4AC VKLGQREYOZDV46F672RFE5XJO3OEOP4EHTCWZYOJY24HVPQX3L6QC PEUS54XQ5KJQYAVUYBG5MWLEHIOVPMZ3ANVC7HPQP6JUWWPRDW5AC X36ICMJNYKJF35ZUEVCCR33JOZPCPZQ6KSGEQI3RZND2P5EDASLQC RIWSVVASWLJQQTSVRHIIUPENOZWOMHQLZMTQVGJUS2ZUGDPSWWIQC Q7TKZCJP2Z75EICZYKCEZDHKGERSOKZGMTSU3UXETBHTF663T66AC 3NA345CN3HKNUQOWTUMUTINQMFLYATWPO4H74J4AFGEUQKGQYBWQC B3XLVPNC4COLLC3FUE34Y7HIKTMF6CJZUASZOU3YM2YGPZKJZP7QC 3OHR6ZPHN53SVWJL4GUKKUC223IPXA73UPBFJCM4ENGS27AKHN6AC #include "blake3_impl.h"#include <string.h>INLINE uint32_t rotr32(uint32_t w, uint32_t c) {return (w >> c) | (w << (32 - c));}INLINE void g(uint32_t *state, size_t a, size_t b, size_t c, size_t d,uint32_t x, uint32_t y) {state[a] = state[a] + state[b] + x;state[d] = rotr32(state[d] ^ state[a], 16);state[c] = state[c] + state[d];state[b] = rotr32(state[b] ^ state[c], 12);state[a] = state[a] + state[b] + y;state[d] = rotr32(state[d] ^ state[a], 8);state[c] = state[c] + state[d];state[b] = rotr32(state[b] ^ state[c], 7);}INLINE void round_fn(uint32_t state[16], const uint32_t *msg, size_t round) {// Select the message schedule based on the round.const uint8_t *schedule = MSG_SCHEDULE[round];// Mix the columns.g(state, 0, 4, 8, 12, msg[schedule[0]], msg[schedule[1]]);g(state, 1, 5, 9, 13, msg[schedule[2]], msg[schedule[3]]);g(state, 2, 6, 10, 14, msg[schedule[4]], msg[schedule[5]]);g(state, 3, 7, 11, 15, msg[schedule[6]], msg[schedule[7]]);// Mix the rows.g(state, 0, 5, 10, 15, msg[schedule[8]], msg[schedule[9]]);g(state, 1, 6, 11, 12, msg[schedule[10]], msg[schedule[11]]);g(state, 2, 7, 8, 13, msg[schedule[12]], msg[schedule[13]]);g(state, 3, 4, 9, 14, msg[schedule[14]], msg[schedule[15]]);}INLINE void compress_pre(uint32_t state[16], const uint32_t cv[8],const uint8_t block[BLAKE3_BLOCK_LEN],uint8_t block_len, uint64_t counter, uint8_t flags) {uint32_t block_words[16];block_words[0] = load32(block + 4 * 0);block_words[1] = load32(block + 4 * 1);block_words[2] = load32(block + 4 * 2);block_words[3] = load32(block + 4 * 3);block_words[4] = load32(block + 4 * 4);block_words[5] = load32(block + 4 * 5);block_words[6] = load32(block + 4 * 6);block_words[7] = load32(block + 4 * 7);block_words[8] = load32(block + 4 * 8);block_words[9] = load32(block + 4 * 9);block_words[10] = load32(block + 4 * 10);block_words[11] = load32(block + 4 * 11);block_words[12] = load32(block + 4 * 12);block_words[13] = load32(block + 4 * 13);block_words[14] = load32(block + 4 * 14);block_words[15] = load32(block + 4 * 15);state[0] = cv[0];state[1] = cv[1];state[2] = cv[2];state[3] = cv[3];state[4] = cv[4];state[5] = cv[5];state[6] = cv[6];state[7] = cv[7];state[8] = IV[0];state[9] = IV[1];state[10] = IV[2];state[11] = IV[3];state[12] = counter_low(counter);state[13] = counter_high(counter);state[14] = (uint32_t)block_len;state[15] = (uint32_t)flags;round_fn(state, &block_words[0], 0);round_fn(state, &block_words[0], 1);round_fn(state, &block_words[0], 2);round_fn(state, &block_words[0], 3);round_fn(state, &block_words[0], 4);round_fn(state, &block_words[0], 5);round_fn(state, &block_words[0], 6);}void blake3_compress_in_place_portable(uint32_t cv[8],const uint8_t block[BLAKE3_BLOCK_LEN],uint8_t block_len, uint64_t counter,uint8_t flags) {uint32_t state[16];compress_pre(state, cv, block, block_len, counter, flags);cv[0] = state[0] ^ state[8];cv[1] = state[1] ^ state[9];cv[2] = state[2] ^ state[10];cv[3] = state[3] ^ state[11];cv[4] = state[4] ^ state[12];cv[5] = state[5] ^ state[13];cv[6] = state[6] ^ state[14];cv[7] = state[7] ^ state[15];}void blake3_compress_xof_portable(const uint32_t cv[8],const uint8_t block[BLAKE3_BLOCK_LEN],uint8_t block_len, uint64_t counter,uint8_t flags, uint8_t out[64]) {uint32_t state[16];compress_pre(state, cv, block, block_len, counter, flags);store32(&out[0 * 4], state[0] ^ state[8]);store32(&out[1 * 4], state[1] ^ state[9]);store32(&out[2 * 4], state[2] ^ state[10]);store32(&out[3 * 4], state[3] ^ state[11]);store32(&out[4 * 4], state[4] ^ state[12]);store32(&out[5 * 4], state[5] ^ state[13]);store32(&out[6 * 4], state[6] ^ state[14]);store32(&out[7 * 4], state[7] ^ state[15]);store32(&out[8 * 4], state[8] ^ cv[0]);store32(&out[9 * 4], state[9] ^ cv[1]);store32(&out[10 * 4], state[10] ^ cv[2]);store32(&out[11 * 4], state[11] ^ cv[3]);store32(&out[12 * 4], state[12] ^ cv[4]);store32(&out[13 * 4], state[13] ^ cv[5]);store32(&out[14 * 4], state[14] ^ cv[6]);store32(&out[15 * 4], state[15] ^ cv[7]);}INLINE void hash_one_portable(const uint8_t *input, size_t blocks,const uint32_t key[8], uint64_t counter,uint8_t flags, uint8_t flags_start,uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]) {uint32_t cv[8];memcpy(cv, key, BLAKE3_KEY_LEN);uint8_t block_flags = flags | flags_start;while (blocks > 0) {if (blocks == 1) {block_flags |= flags_end;}blake3_compress_in_place_portable(cv, input, BLAKE3_BLOCK_LEN, counter,block_flags);input = &input[BLAKE3_BLOCK_LEN];blocks -= 1;block_flags = flags;}store_cv_words(out, cv);}void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,size_t blocks, const uint32_t key[8],uint64_t counter, bool increment_counter,uint8_t flags, uint8_t flags_start,uint8_t flags_end, uint8_t *out) {while (num_inputs > 0) {hash_one_portable(inputs[0], blocks, key, counter, flags, flags_start,flags_end, out);if (increment_counter) {counter += 1;}inputs += 1;num_inputs -= 1;out = &out[BLAKE3_OUT_LEN];}}
#ifndef BLAKE3_IMPL_H#define BLAKE3_IMPL_H#include <assert.h>#include <stdbool.h>#include <stddef.h>#include <stdint.h>#include <string.h>#include "blake3.h"// internal flagsenum blake3_flags {CHUNK_START = 1 << 0,CHUNK_END = 1 << 1,PARENT = 1 << 2,ROOT = 1 << 3,KEYED_HASH = 1 << 4,DERIVE_KEY_CONTEXT = 1 << 5,DERIVE_KEY_MATERIAL = 1 << 6,};// This C implementation tries to support recent versions of GCC, Clang, and// MSVC.#if defined(_MSC_VER)#define INLINE static __forceinline#else#define INLINE static inline __attribute__((always_inline))#endif#if defined(__x86_64__) || defined(_M_X64)#define IS_X86#define IS_X86_64#endif#if defined(__i386__) || defined(_M_IX86)#define IS_X86#define IS_X86_32#endif#if defined(__aarch64__) || defined(_M_ARM64)#define IS_AARCH64#endif#if defined(IS_X86)#if defined(_MSC_VER)#include <intrin.h>#endif#include <immintrin.h>#endif#if !defined(BLAKE3_USE_NEON)// If BLAKE3_USE_NEON not manually set, autodetect based on AArch64ness#if defined(IS_AARCH64)#define BLAKE3_USE_NEON 1#else#define BLAKE3_USE_NEON 0#endif#endif#if defined(IS_X86)#define MAX_SIMD_DEGREE 16#elif BLAKE3_USE_NEON == 1#define MAX_SIMD_DEGREE 4#else#define MAX_SIMD_DEGREE 1#endif// There are some places where we want a static size that's equal to the// MAX_SIMD_DEGREE, but also at least 2.#define MAX_SIMD_DEGREE_OR_2 (MAX_SIMD_DEGREE > 2 ? MAX_SIMD_DEGREE : 2)static const uint32_t IV[8] = {0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL,0xA54FF53AUL, 0x510E527FUL, 0x9B05688CUL,0x1F83D9ABUL, 0x5BE0CD19UL};static const uint8_t MSG_SCHEDULE[7][16] = {{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},{2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8},{3, 4, 10, 12, 13, 2, 7, 14, 6, 5, 9, 0, 11, 15, 8, 1},{10, 7, 12, 9, 14, 3, 13, 15, 4, 0, 11, 2, 5, 8, 1, 6},{12, 13, 9, 11, 15, 10, 14, 8, 7, 2, 5, 3, 0, 1, 6, 4},{9, 14, 11, 5, 8, 12, 15, 1, 13, 3, 0, 10, 2, 6, 4, 7},{11, 15, 5, 0, 1, 9, 8, 6, 14, 10, 2, 12, 3, 4, 7, 13},};/* Find index of the highest set bit *//* x is assumed to be nonzero. */static unsigned int highest_one(uint64_t x) {#if defined(__GNUC__) || defined(__clang__)return 63 ^ __builtin_clzll(x);#elif defined(_MSC_VER) && defined(IS_X86_64)unsigned long index;_BitScanReverse64(&index, x);return index;#elif defined(_MSC_VER) && defined(IS_X86_32)if(x >> 32) {unsigned long index;_BitScanReverse(&index, (unsigned long)(x >> 32));return 32 + index;} else {unsigned long index;_BitScanReverse(&index, (unsigned long)x);return index;}#elseunsigned int c = 0;if(x & 0xffffffff00000000ULL) { x >>= 32; c += 32; }if(x & 0x00000000ffff0000ULL) { x >>= 16; c += 16; }if(x & 0x000000000000ff00ULL) { x >>= 8; c += 8; }if(x & 0x00000000000000f0ULL) { x >>= 4; c += 4; }if(x & 0x000000000000000cULL) { x >>= 2; c += 2; }if(x & 0x0000000000000002ULL) { c += 1; }return c;#endif}// Count the number of 1 bits.INLINE unsigned int popcnt(uint64_t x) {#if defined(__GNUC__) || defined(__clang__)return __builtin_popcountll(x);#elseunsigned int count = 0;while (x != 0) {count += 1;x &= x - 1;}return count;#endif}// Largest power of two less than or equal to x. As a special case, returns 1// when x is 0.INLINE uint64_t round_down_to_power_of_2(uint64_t x) {return 1ULL << highest_one(x | 1);}INLINE uint32_t counter_low(uint64_t counter) { return (uint32_t)counter; }INLINE uint32_t counter_high(uint64_t counter) {return (uint32_t)(counter >> 32);}INLINE uint32_t load32(const void *src) {const uint8_t *p = (const uint8_t *)src;return ((uint32_t)(p[0]) << 0) | ((uint32_t)(p[1]) << 8) |((uint32_t)(p[2]) << 16) | ((uint32_t)(p[3]) << 24);}INLINE void load_key_words(const uint8_t key[BLAKE3_KEY_LEN],uint32_t key_words[8]) {key_words[0] = load32(&key[0 * 4]);key_words[1] = load32(&key[1 * 4]);key_words[2] = load32(&key[2 * 4]);key_words[3] = load32(&key[3 * 4]);key_words[4] = load32(&key[4 * 4]);key_words[5] = load32(&key[5 * 4]);key_words[6] = load32(&key[6 * 4]);key_words[7] = load32(&key[7 * 4]);}INLINE void store32(void *dst, uint32_t w) {uint8_t *p = (uint8_t *)dst;p[0] = (uint8_t)(w >> 0);p[1] = (uint8_t)(w >> 8);p[2] = (uint8_t)(w >> 16);p[3] = (uint8_t)(w >> 24);}INLINE void store_cv_words(uint8_t bytes_out[32], uint32_t cv_words[8]) {store32(&bytes_out[0 * 4], cv_words[0]);store32(&bytes_out[1 * 4], cv_words[1]);store32(&bytes_out[2 * 4], cv_words[2]);store32(&bytes_out[3 * 4], cv_words[3]);store32(&bytes_out[4 * 4], cv_words[4]);store32(&bytes_out[5 * 4], cv_words[5]);store32(&bytes_out[6 * 4], cv_words[6]);store32(&bytes_out[7 * 4], cv_words[7]);}void blake3_compress_in_place(uint32_t cv[8],const uint8_t block[BLAKE3_BLOCK_LEN],uint8_t block_len, uint64_t counter,uint8_t flags);void blake3_compress_xof(const uint32_t cv[8],const uint8_t block[BLAKE3_BLOCK_LEN],uint8_t block_len, uint64_t counter, uint8_t flags,uint8_t out[64]);void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,size_t blocks, const uint32_t key[8], uint64_t counter,bool increment_counter, uint8_t flags,uint8_t flags_start, uint8_t flags_end, uint8_t *out);size_t blake3_simd_degree(void);// Declarations for implementation-specific functions.void blake3_compress_in_place_portable(uint32_t cv[8],const uint8_t block[BLAKE3_BLOCK_LEN],uint8_t block_len, uint64_t counter,uint8_t flags);void blake3_compress_xof_portable(const uint32_t cv[8],const uint8_t block[BLAKE3_BLOCK_LEN],uint8_t block_len, uint64_t counter,uint8_t flags, uint8_t out[64]);void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,size_t blocks, const uint32_t key[8],uint64_t counter, bool increment_counter,uint8_t flags, uint8_t flags_start,uint8_t flags_end, uint8_t *out);#if defined(IS_X86)#if !defined(BLAKE3_NO_SSE2)void blake3_compress_in_place_sse2(uint32_t cv[8],const uint8_t block[BLAKE3_BLOCK_LEN],uint8_t block_len, uint64_t counter,uint8_t flags);void blake3_compress_xof_sse2(const uint32_t cv[8],const uint8_t block[BLAKE3_BLOCK_LEN],uint8_t block_len, uint64_t counter,uint8_t flags, uint8_t out[64]);void blake3_hash_many_sse2(const uint8_t *const *inputs, size_t num_inputs,size_t blocks, const uint32_t key[8],uint64_t counter, bool increment_counter,uint8_t flags, uint8_t flags_start,uint8_t flags_end, uint8_t *out);#endif#if !defined(BLAKE3_NO_SSE41)void blake3_compress_in_place_sse41(uint32_t cv[8],const uint8_t block[BLAKE3_BLOCK_LEN],uint8_t block_len, uint64_t counter,uint8_t flags);void blake3_compress_xof_sse41(const uint32_t cv[8],const uint8_t block[BLAKE3_BLOCK_LEN],uint8_t block_len, uint64_t counter,uint8_t flags, uint8_t out[64]);void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,size_t blocks, const uint32_t key[8],uint64_t counter, bool increment_counter,uint8_t flags, uint8_t flags_start,uint8_t flags_end, uint8_t *out);#endif#if !defined(BLAKE3_NO_AVX2)void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,size_t blocks, const uint32_t key[8],uint64_t counter, bool increment_counter,uint8_t flags, uint8_t flags_start,uint8_t flags_end, uint8_t *out);#endif#if !defined(BLAKE3_NO_AVX512)void blake3_compress_in_place_avx512(uint32_t cv[8],const uint8_t block[BLAKE3_BLOCK_LEN],uint8_t block_len, uint64_t counter,uint8_t flags);void blake3_compress_xof_avx512(const uint32_t cv[8],const uint8_t block[BLAKE3_BLOCK_LEN],uint8_t block_len, uint64_t counter,uint8_t flags, uint8_t out[64]);void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs,size_t blocks, const uint32_t key[8],uint64_t counter, bool increment_counter,uint8_t flags, uint8_t flags_start,uint8_t flags_end, uint8_t *out);#endif#endif#if BLAKE3_USE_NEON == 1void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,size_t blocks, const uint32_t key[8],uint64_t counter, bool increment_counter,uint8_t flags, uint8_t flags_start,uint8_t flags_end, uint8_t *out);#endif#endif /* BLAKE3_IMPL_H */
#include <stdbool.h>#include <stddef.h>#include <stdint.h>#include "blake3_impl.h"#if defined(IS_X86)#if defined(_MSC_VER)#include <intrin.h>#elif defined(__GNUC__)#include <immintrin.h>#else#error "Unimplemented!"#endif#endif#define MAYBE_UNUSED(x) (void)((x))#if defined(IS_X86)static uint64_t xgetbv(void) {#if defined(_MSC_VER)return _xgetbv(0);#elseuint32_t eax = 0, edx = 0;__asm__ __volatile__("xgetbv\n" : "=a"(eax), "=d"(edx) : "c"(0));return ((uint64_t)edx << 32) | eax;#endif}static void cpuid(uint32_t out[4], uint32_t id) {#if defined(_MSC_VER)__cpuid((int *)out, id);#elif defined(__i386__) || defined(_M_IX86)__asm__ __volatile__("movl %%ebx, %1\n""cpuid\n""xchgl %1, %%ebx\n": "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3]): "a"(id));#else__asm__ __volatile__("cpuid\n": "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]): "a"(id));#endif}static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid) {#if defined(_MSC_VER)__cpuidex((int *)out, id, sid);#elif defined(__i386__) || defined(_M_IX86)__asm__ __volatile__("movl %%ebx, %1\n""cpuid\n""xchgl %1, %%ebx\n": "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3]): "a"(id), "c"(sid));#else__asm__ __volatile__("cpuid\n": "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]): "a"(id), "c"(sid));#endif}#endifenum cpu_feature {SSE2 = 1 << 0,SSSE3 = 1 << 1,SSE41 = 1 << 2,AVX = 1 << 3,AVX2 = 1 << 4,AVX512F = 1 << 5,AVX512VL = 1 << 6,/* ... */UNDEFINED = 1 << 30};#if !defined(BLAKE3_TESTING)static /* Allow the variable to be controlled manually for testing */#endifenum cpu_feature g_cpu_features = UNDEFINED;#if !defined(BLAKE3_TESTING)static#endifenum cpu_featureget_cpu_features(void) {if (g_cpu_features != UNDEFINED) {return g_cpu_features;} else {#if defined(IS_X86)uint32_t regs[4] = {0};uint32_t *eax = ®s[0], *ebx = ®s[1], *ecx = ®s[2], *edx = ®s[3];(void)edx;enum cpu_feature features = 0;cpuid(regs, 0);const int max_id = *eax;cpuid(regs, 1);#if defined(__amd64__) || defined(_M_X64)features |= SSE2;#elseif (*edx & (1UL << 26))features |= SSE2;#endifif (*ecx & (1UL << 0))features |= SSSE3;if (*ecx & (1UL << 19))features |= SSE41;if (*ecx & (1UL << 27)) { // OSXSAVEconst uint64_t mask = xgetbv();if ((mask & 6) == 6) { // SSE and AVX statesif (*ecx & (1UL << 28))features |= AVX;if (max_id >= 7) {cpuidex(regs, 7, 0);if (*ebx & (1UL << 5))features |= AVX2;if ((mask & 224) == 224) { // Opmask, ZMM_Hi256, Hi16_Zmmif (*ebx & (1UL << 31))features |= AVX512VL;if (*ebx & (1UL << 16))features |= AVX512F;}}}}g_cpu_features = features;return features;#else/* How to detect NEON? */return 0;#endif}}void blake3_compress_in_place(uint32_t cv[8],const uint8_t block[BLAKE3_BLOCK_LEN],uint8_t block_len, uint64_t counter,uint8_t flags) {#if defined(IS_X86)const enum cpu_feature features = get_cpu_features();MAYBE_UNUSED(features);#if !defined(BLAKE3_NO_AVX512)if (features & AVX512VL) {blake3_compress_in_place_avx512(cv, block, block_len, counter, flags);return;}#endif#if !defined(BLAKE3_NO_SSE41)if (features & SSE41) {blake3_compress_in_place_sse41(cv, block, block_len, counter, flags);return;}#endif#if !defined(BLAKE3_NO_SSE2)if (features & SSE2) {blake3_compress_in_place_sse2(cv, block, block_len, counter, flags);return;}#endif#endifblake3_compress_in_place_portable(cv, block, block_len, counter, flags);}void blake3_compress_xof(const uint32_t cv[8],const uint8_t block[BLAKE3_BLOCK_LEN],uint8_t block_len, uint64_t counter, uint8_t flags,uint8_t out[64]) {#if defined(IS_X86)const enum cpu_feature features = get_cpu_features();MAYBE_UNUSED(features);#if !defined(BLAKE3_NO_AVX512)if (features & AVX512VL) {blake3_compress_xof_avx512(cv, block, block_len, counter, flags, out);return;}#endif#if !defined(BLAKE3_NO_SSE41)if (features & SSE41) {blake3_compress_xof_sse41(cv, block, block_len, counter, flags, out);return;}#endif#if !defined(BLAKE3_NO_SSE2)if (features & SSE2) {blake3_compress_xof_sse2(cv, block, block_len, counter, flags, out);return;}#endif#endifblake3_compress_xof_portable(cv, block, block_len, counter, flags, out);}void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,size_t blocks, const uint32_t key[8], uint64_t counter,bool increment_counter, uint8_t flags,uint8_t flags_start, uint8_t flags_end, uint8_t *out) {#if defined(IS_X86)const enum cpu_feature features = get_cpu_features();MAYBE_UNUSED(features);#if !defined(BLAKE3_NO_AVX512)if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,increment_counter, flags, flags_start, flags_end,out);return;}#endif#if !defined(BLAKE3_NO_AVX2)if (features & AVX2) {blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter,increment_counter, flags, flags_start, flags_end,out);return;}#endif#if !defined(BLAKE3_NO_SSE41)if (features & SSE41) {blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,increment_counter, flags, flags_start, flags_end,out);return;}#endif#if !defined(BLAKE3_NO_SSE2)if (features & SSE2) {blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter,increment_counter, flags, flags_start, flags_end,out);return;}#endif#endif#if BLAKE3_USE_NEON == 1blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter,increment_counter, flags, flags_start, flags_end, out);return;#endifblake3_hash_many_portable(inputs, num_inputs, blocks, key, counter,increment_counter, flags, flags_start, flags_end,out);}// The dynamically detected SIMD degree of the current platform.size_t blake3_simd_degree(void) {#if defined(IS_X86)const enum cpu_feature features = get_cpu_features();MAYBE_UNUSED(features);#if !defined(BLAKE3_NO_AVX512)if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {return 16;}#endif#if !defined(BLAKE3_NO_AVX2)if (features & AVX2) {return 8;}#endif#if !defined(BLAKE3_NO_SSE41)if (features & SSE41) {return 4;}#endif#if !defined(BLAKE3_NO_SSE2)if (features & SSE2) {return 4;}#endif#endif#if BLAKE3_USE_NEON == 1return 4;#endifreturn 1;}
#ifndef BLAKE3_H#define BLAKE3_H#include <stddef.h>#include <stdint.h>#ifdef __cplusplusextern "C" {#endif#define BLAKE3_VERSION_STRING "1.3.1"#define BLAKE3_KEY_LEN 32#define BLAKE3_OUT_LEN 32#define BLAKE3_BLOCK_LEN 64#define BLAKE3_CHUNK_LEN 1024#define BLAKE3_MAX_DEPTH 54// This struct is a private implementation detail. It has to be here because// it's part of blake3_hasher below.typedef struct {uint32_t cv[8];uint64_t chunk_counter;uint8_t buf[BLAKE3_BLOCK_LEN];uint8_t buf_len;uint8_t blocks_compressed;uint8_t flags;} blake3_chunk_state;typedef struct {uint32_t key[8];blake3_chunk_state chunk;uint8_t cv_stack_len;// The stack size is MAX_DEPTH + 1 because we do lazy merging. For example,// with 7 chunks, we have 3 entries in the stack. Adding an 8th chunk// requires a 4th entry, rather than merging everything down to 1, because we// don't know whether more input is coming. This is different from how the// reference implementation does things.uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN];} blake3_hasher;const char *blake3_version(void);void blake3_hasher_init(blake3_hasher *self);void blake3_hasher_init_keyed(blake3_hasher *self,const uint8_t key[BLAKE3_KEY_LEN]);void blake3_hasher_init_derive_key(blake3_hasher *self, const char *context);void blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context,size_t context_len);void blake3_hasher_update(blake3_hasher *self, const void *input,size_t input_len);void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,size_t out_len);void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,uint8_t *out, size_t out_len);void blake3_hasher_reset(blake3_hasher *self);#ifdef __cplusplus}#endif#endif /* BLAKE3_H */
#include <assert.h>#include <stdbool.h>#include <string.h>#include "blake3.h"#include "blake3_impl.h"const char *blake3_version(void) { return BLAKE3_VERSION_STRING; }INLINE void chunk_state_init(blake3_chunk_state *self, const uint32_t key[8],uint8_t flags) {memcpy(self->cv, key, BLAKE3_KEY_LEN);self->chunk_counter = 0;memset(self->buf, 0, BLAKE3_BLOCK_LEN);self->buf_len = 0;self->blocks_compressed = 0;self->flags = flags;}INLINE void chunk_state_reset(blake3_chunk_state *self, const uint32_t key[8],uint64_t chunk_counter) {memcpy(self->cv, key, BLAKE3_KEY_LEN);self->chunk_counter = chunk_counter;self->blocks_compressed = 0;memset(self->buf, 0, BLAKE3_BLOCK_LEN);self->buf_len = 0;}INLINE size_t chunk_state_len(const blake3_chunk_state *self) {return (BLAKE3_BLOCK_LEN * (size_t)self->blocks_compressed) +((size_t)self->buf_len);}INLINE size_t chunk_state_fill_buf(blake3_chunk_state *self,const uint8_t *input, size_t input_len) {size_t take = BLAKE3_BLOCK_LEN - ((size_t)self->buf_len);if (take > input_len) {take = input_len;}uint8_t *dest = self->buf + ((size_t)self->buf_len);memcpy(dest, input, take);self->buf_len += (uint8_t)take;return take;}INLINE uint8_t chunk_state_maybe_start_flag(const blake3_chunk_state *self) {if (self->blocks_compressed == 0) {return CHUNK_START;} else {return 0;}}typedef struct {uint32_t input_cv[8];uint64_t counter;uint8_t block[BLAKE3_BLOCK_LEN];uint8_t block_len;uint8_t flags;} output_t;INLINE output_t make_output(const uint32_t input_cv[8],const uint8_t block[BLAKE3_BLOCK_LEN],uint8_t block_len, uint64_t counter,uint8_t flags) {output_t ret;memcpy(ret.input_cv, input_cv, 32);memcpy(ret.block, block, BLAKE3_BLOCK_LEN);ret.block_len = block_len;ret.counter = counter;ret.flags = flags;return ret;}// Chaining values within a given chunk (specifically the compress_in_place// interface) are represented as words. This avoids unnecessary bytes<->words// conversion overhead in the portable implementation. However, the hash_many// interface handles both user input and parent node blocks, so it accepts// bytes. For that reason, chaining values in the CV stack are represented as// bytes.INLINE void output_chaining_value(const output_t *self, uint8_t cv[32]) {uint32_t cv_words[8];memcpy(cv_words, self->input_cv, 32);blake3_compress_in_place(cv_words, self->block, self->block_len,self->counter, self->flags);store_cv_words(cv, cv_words);}INLINE void output_root_bytes(const output_t *self, uint64_t seek, uint8_t *out,size_t out_len) {uint64_t output_block_counter = seek / 64;size_t offset_within_block = seek % 64;uint8_t wide_buf[64];while (out_len > 0) {blake3_compress_xof(self->input_cv, self->block, self->block_len,output_block_counter, self->flags | ROOT, wide_buf);size_t available_bytes = 64 - offset_within_block;size_t memcpy_len;if (out_len > available_bytes) {memcpy_len = available_bytes;} else {memcpy_len = out_len;}memcpy(out, wide_buf + offset_within_block, memcpy_len);out += memcpy_len;out_len -= memcpy_len;output_block_counter += 1;offset_within_block = 0;}}INLINE void chunk_state_update(blake3_chunk_state *self, const uint8_t *input,size_t input_len) {if (self->buf_len > 0) {size_t take = chunk_state_fill_buf(self, input, input_len);input += take;input_len -= take;if (input_len > 0) {blake3_compress_in_place(self->cv, self->buf, BLAKE3_BLOCK_LEN, self->chunk_counter,self->flags | chunk_state_maybe_start_flag(self));self->blocks_compressed += 1;self->buf_len = 0;memset(self->buf, 0, BLAKE3_BLOCK_LEN);}}while (input_len > BLAKE3_BLOCK_LEN) {blake3_compress_in_place(self->cv, input, BLAKE3_BLOCK_LEN,self->chunk_counter,self->flags | chunk_state_maybe_start_flag(self));self->blocks_compressed += 1;input += BLAKE3_BLOCK_LEN;input_len -= BLAKE3_BLOCK_LEN;}size_t take = chunk_state_fill_buf(self, input, input_len);input += take;input_len -= take;}INLINE output_t chunk_state_output(const blake3_chunk_state *self) {uint8_t block_flags =self->flags | chunk_state_maybe_start_flag(self) | CHUNK_END;return make_output(self->cv, self->buf, self->buf_len, self->chunk_counter,block_flags);}INLINE output_t parent_output(const uint8_t block[BLAKE3_BLOCK_LEN],const uint32_t key[8], uint8_t flags) {return make_output(key, block, BLAKE3_BLOCK_LEN, 0, flags | PARENT);}// Given some input larger than one chunk, return the number of bytes that// should go in the left subtree. This is the largest power-of-2 number of// chunks that leaves at least 1 byte for the right subtree.INLINE size_t left_len(size_t content_len) {// Subtract 1 to reserve at least one byte for the right side. content_len// should always be greater than BLAKE3_CHUNK_LEN.size_t full_chunks = (content_len - 1) / BLAKE3_CHUNK_LEN;return round_down_to_power_of_2(full_chunks) * BLAKE3_CHUNK_LEN;}// Use SIMD parallelism to hash up to MAX_SIMD_DEGREE chunks at the same time// on a single thread. Write out the chunk chaining values and return the// number of chunks hashed. These chunks are never the root and never empty;// those cases use a different codepath.INLINE size_t compress_chunks_parallel(const uint8_t *input, size_t input_len,const uint32_t key[8],uint64_t chunk_counter, uint8_t flags,uint8_t *out) {#if defined(BLAKE3_TESTING)assert(0 < input_len);assert(input_len <= MAX_SIMD_DEGREE * BLAKE3_CHUNK_LEN);#endifconst uint8_t *chunks_array[MAX_SIMD_DEGREE];size_t input_position = 0;size_t chunks_array_len = 0;while (input_len - input_position >= BLAKE3_CHUNK_LEN) {chunks_array[chunks_array_len] = &input[input_position];input_position += BLAKE3_CHUNK_LEN;chunks_array_len += 1;}blake3_hash_many(chunks_array, chunks_array_len,BLAKE3_CHUNK_LEN / BLAKE3_BLOCK_LEN, key, chunk_counter,true, flags, CHUNK_START, CHUNK_END, out);// Hash the remaining partial chunk, if there is one. Note that the empty// chunk (meaning the empty message) is a different codepath.if (input_len > input_position) {uint64_t counter = chunk_counter + (uint64_t)chunks_array_len;blake3_chunk_state chunk_state;chunk_state_init(&chunk_state, key, flags);chunk_state.chunk_counter = counter;chunk_state_update(&chunk_state, &input[input_position],input_len - input_position);output_t output = chunk_state_output(&chunk_state);output_chaining_value(&output, &out[chunks_array_len * BLAKE3_OUT_LEN]);return chunks_array_len + 1;} else {return chunks_array_len;}}// Use SIMD parallelism to hash up to MAX_SIMD_DEGREE parents at the same time// on a single thread. Write out the parent chaining values and return the// number of parents hashed. (If there's an odd input chaining value left over,// return it as an additional output.) These parents are never the root and// never empty; those cases use a different codepath.INLINE size_t compress_parents_parallel(const uint8_t *child_chaining_values,size_t num_chaining_values,const uint32_t key[8], uint8_t flags,uint8_t *out) {#if defined(BLAKE3_TESTING)assert(2 <= num_chaining_values);assert(num_chaining_values <= 2 * MAX_SIMD_DEGREE_OR_2);#endifconst uint8_t *parents_array[MAX_SIMD_DEGREE_OR_2];size_t parents_array_len = 0;while (num_chaining_values - (2 * parents_array_len) >= 2) {parents_array[parents_array_len] =&child_chaining_values[2 * parents_array_len * BLAKE3_OUT_LEN];parents_array_len += 1;}blake3_hash_many(parents_array, parents_array_len, 1, key,0, // Parents always use counter 0.false, flags | PARENT,0, // Parents have no start flags.0, // Parents have no end flags.out);// If there's an odd child left over, it becomes an output.if (num_chaining_values > 2 * parents_array_len) {memcpy(&out[parents_array_len * BLAKE3_OUT_LEN],&child_chaining_values[2 * parents_array_len * BLAKE3_OUT_LEN],BLAKE3_OUT_LEN);return parents_array_len + 1;} else {return parents_array_len;}}// The wide helper function returns (writes out) an array of chaining values// and returns the length of that array. The number of chaining values returned// is the dynamically detected SIMD degree, at most MAX_SIMD_DEGREE. Or fewer,// if the input is shorter than that many chunks. The reason for maintaining a// wide array of chaining values going back up the tree, is to allow the// implementation to hash as many parents in parallel as possible.//// As a special case when the SIMD degree is 1, this function will still return// at least 2 outputs. This guarantees that this function doesn't perform the// root compression. (If it did, it would use the wrong flags, and also we// wouldn't be able to implement exendable output.) Note that this function is// not used when the whole input is only 1 chunk long; that's a different// codepath.//// Why not just have the caller split the input on the first update(), instead// of implementing this special rule? Because we don't want to limit SIMD or// multi-threading parallelism for that update().static size_t blake3_compress_subtree_wide(const uint8_t *input,size_t input_len,const uint32_t key[8],uint64_t chunk_counter,uint8_t flags, uint8_t *out) {// Note that the single chunk case does *not* bump the SIMD degree up to 2// when it is 1. If this implementation adds multi-threading in the future,// this gives us the option of multi-threading even the 2-chunk case, which// can help performance on smaller platforms.if (input_len <= blake3_simd_degree() * BLAKE3_CHUNK_LEN) {return compress_chunks_parallel(input, input_len, key, chunk_counter, flags,out);}// With more than simd_degree chunks, we need to recurse. Start by dividing// the input into left and right subtrees. (Note that this is only optimal// as long as the SIMD degree is a power of 2. If we ever get a SIMD degree// of 3 or something, we'll need a more complicated strategy.)size_t left_input_len = left_len(input_len);size_t right_input_len = input_len - left_input_len;const uint8_t *right_input = &input[left_input_len];uint64_t right_chunk_counter =chunk_counter + (uint64_t)(left_input_len / BLAKE3_CHUNK_LEN);// Make space for the child outputs. Here we use MAX_SIMD_DEGREE_OR_2 to// account for the special case of returning 2 outputs when the SIMD degree// is 1.uint8_t cv_array[2 * MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN];size_t degree = blake3_simd_degree();if (left_input_len > BLAKE3_CHUNK_LEN && degree == 1) {// The special case: We always use a degree of at least two, to make// sure there are two outputs. Except, as noted above, at the chunk// level, where we allow degree=1. (Note that the 1-chunk-input case is// a different codepath.)degree = 2;}uint8_t *right_cvs = &cv_array[degree * BLAKE3_OUT_LEN];// Recurse! If this implementation adds multi-threading support in the// future, this is where it will go.size_t left_n = blake3_compress_subtree_wide(input, left_input_len, key,chunk_counter, flags, cv_array);size_t right_n = blake3_compress_subtree_wide(right_input, right_input_len, key, right_chunk_counter, flags, right_cvs);// The special case again. If simd_degree=1, then we'll have left_n=1 and// right_n=1. Rather than compressing them into a single output, return// them directly, to make sure we always have at least two outputs.if (left_n == 1) {memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN);return 2;}// Otherwise, do one layer of parent node compression.size_t num_chaining_values = left_n + right_n;return compress_parents_parallel(cv_array, num_chaining_values, key, flags,out);}// Hash a subtree with compress_subtree_wide(), and then condense the resulting// list of chaining values down to a single parent node. Don't compress that// last parent node, however. Instead, return its message bytes (the// concatenated chaining values of its children). This is necessary when the// first call to update() supplies a complete subtree, because the topmost// parent node of that subtree could end up being the root. It's also necessary// for extended output in the general case.//// As with compress_subtree_wide(), this function is not used on inputs of 1// chunk or less. That's a different codepath.INLINE void compress_subtree_to_parent_node(const uint8_t *input, size_t input_len, const uint32_t key[8],uint64_t chunk_counter, uint8_t flags, uint8_t out[2 * BLAKE3_OUT_LEN]) {#if defined(BLAKE3_TESTING)assert(input_len > BLAKE3_CHUNK_LEN);#endifuint8_t cv_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN];size_t num_cvs = blake3_compress_subtree_wide(input, input_len, key,chunk_counter, flags, cv_array);assert(num_cvs <= MAX_SIMD_DEGREE_OR_2);// If MAX_SIMD_DEGREE is greater than 2 and there's enough input,// compress_subtree_wide() returns more than 2 chaining values. Condense// them into 2 by forming parent nodes repeatedly.uint8_t out_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN / 2];// The second half of this loop condition is always true, and we just// asserted it above. But GCC can't tell that it's always true, and if NDEBUG// is set on platforms where MAX_SIMD_DEGREE_OR_2 == 2, GCC emits spurious// warnings here. GCC 8.5 is particularly sensitive, so if you're changing// this code, test it against that version.while (num_cvs > 2 && num_cvs <= MAX_SIMD_DEGREE_OR_2) {num_cvs =compress_parents_parallel(cv_array, num_cvs, key, flags, out_array);memcpy(cv_array, out_array, num_cvs * BLAKE3_OUT_LEN);}memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN);}INLINE void hasher_init_base(blake3_hasher *self, const uint32_t key[8],uint8_t flags) {memcpy(self->key, key, BLAKE3_KEY_LEN);chunk_state_init(&self->chunk, key, flags);self->cv_stack_len = 0;}void blake3_hasher_init(blake3_hasher *self) { hasher_init_base(self, IV, 0); }void blake3_hasher_init_keyed(blake3_hasher *self,const uint8_t key[BLAKE3_KEY_LEN]) {uint32_t key_words[8];load_key_words(key, key_words);hasher_init_base(self, key_words, KEYED_HASH);}void blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context,size_t context_len) {blake3_hasher context_hasher;hasher_init_base(&context_hasher, IV, DERIVE_KEY_CONTEXT);blake3_hasher_update(&context_hasher, context, context_len);uint8_t context_key[BLAKE3_KEY_LEN];blake3_hasher_finalize(&context_hasher, context_key, BLAKE3_KEY_LEN);uint32_t context_key_words[8];load_key_words(context_key, context_key_words);hasher_init_base(self, context_key_words, DERIVE_KEY_MATERIAL);}void blake3_hasher_init_derive_key(blake3_hasher *self, const char *context) {blake3_hasher_init_derive_key_raw(self, context, strlen(context));}// As described in hasher_push_cv() below, we do "lazy merging", delaying// merges until right before the next CV is about to be added. This is// different from the reference implementation. Another difference is that we// aren't always merging 1 chunk at a time. Instead, each CV might represent// any power-of-two number of chunks, as long as the smaller-above-larger stack// order is maintained. Instead of the "count the trailing 0-bits" algorithm// described in the spec, we use a "count the total number of 1-bits" variant// that doesn't require us to retain the subtree size of the CV on top of the// stack. The principle is the same: each CV that should remain in the stack is// represented by a 1-bit in the total number of chunks (or bytes) so far.INLINE void hasher_merge_cv_stack(blake3_hasher *self, uint64_t total_len) {size_t post_merge_stack_len = (size_t)popcnt(total_len);while (self->cv_stack_len > post_merge_stack_len) {uint8_t *parent_node =&self->cv_stack[(self->cv_stack_len - 2) * BLAKE3_OUT_LEN];output_t output = parent_output(parent_node, self->key, self->chunk.flags);output_chaining_value(&output, parent_node);self->cv_stack_len -= 1;}}// In reference_impl.rs, we merge the new CV with existing CVs from the stack// before pushing it. We can do that because we know more input is coming, so// we know none of the merges are root.//// This setting is different. We want to feed as much input as possible to// compress_subtree_wide(), without setting aside anything for the chunk_state.// If the user gives us 64 KiB, we want to parallelize over all 64 KiB at once// as a single subtree, if at all possible.//// This leads to two problems:// 1) This 64 KiB input might be the only call that ever gets made to update.// In this case, the root node of the 64 KiB subtree would be the root node// of the whole tree, and it would need to be ROOT finalized. We can't// compress it until we know.// 2) This 64 KiB input might complete a larger tree, whose root node is// similarly going to be the the root of the whole tree. For example, maybe// we have 196 KiB (that is, 128 + 64) hashed so far. We can't compress the// node at the root of the 256 KiB subtree until we know how to finalize it.//// The second problem is solved with "lazy merging". That is, when we're about// to add a CV to the stack, we don't merge it with anything first, as the// reference impl does. Instead we do merges using the *previous* CV that was// added, which is sitting on top of the stack, and we put the new CV// (unmerged) on top of the stack afterwards. This guarantees that we never// merge the root node until finalize().//// Solving the first problem requires an additional tool,// compress_subtree_to_parent_node(). That function always returns the top// *two* chaining values of the subtree it's compressing. We then do lazy// merging with each of them separately, so that the second CV will always// remain unmerged. (That also helps us support extendable output when we're// hashing an input all-at-once.)INLINE void hasher_push_cv(blake3_hasher *self, uint8_t new_cv[BLAKE3_OUT_LEN],uint64_t chunk_counter) {hasher_merge_cv_stack(self, chunk_counter);memcpy(&self->cv_stack[self->cv_stack_len * BLAKE3_OUT_LEN], new_cv,BLAKE3_OUT_LEN);self->cv_stack_len += 1;}void blake3_hasher_update(blake3_hasher *self, const void *input,size_t input_len) {// Explicitly checking for zero avoids causing UB by passing a null pointer// to memcpy. This comes up in practice with things like:// std::vector<uint8_t> v;// blake3_hasher_update(&hasher, v.data(), v.size());if (input_len == 0) {return;}const uint8_t *input_bytes = (const uint8_t *)input;// If we have some partial chunk bytes in the internal chunk_state, we need// to finish that chunk first.if (chunk_state_len(&self->chunk) > 0) {size_t take = BLAKE3_CHUNK_LEN - chunk_state_len(&self->chunk);if (take > input_len) {take = input_len;}chunk_state_update(&self->chunk, input_bytes, take);input_bytes += take;input_len -= take;// If we've filled the current chunk and there's more coming, finalize this// chunk and proceed. In this case we know it's not the root.if (input_len > 0) {output_t output = chunk_state_output(&self->chunk);uint8_t chunk_cv[32];output_chaining_value(&output, chunk_cv);hasher_push_cv(self, chunk_cv, self->chunk.chunk_counter);chunk_state_reset(&self->chunk, self->key, self->chunk.chunk_counter + 1);} else {return;}}// Now the chunk_state is clear, and we have more input. If there's more than// a single chunk (so, definitely not the root chunk), hash the largest whole// subtree we can, with the full benefits of SIMD (and maybe in the future,// multi-threading) parallelism. Two restrictions:// - The subtree has to be a power-of-2 number of chunks. Only subtrees along// the right edge can be incomplete, and we don't know where the right edge// is going to be until we get to finalize().// - The subtree must evenly divide the total number of chunks up until this// point (if total is not 0). If the current incomplete subtree is only// waiting for 1 more chunk, we can't hash a subtree of 4 chunks. We have// to complete the current subtree first.// Because we might need to break up the input to form powers of 2, or to// evenly divide what we already have, this part runs in a loop.while (input_len > BLAKE3_CHUNK_LEN) {size_t subtree_len = round_down_to_power_of_2(input_len);uint64_t count_so_far = self->chunk.chunk_counter * BLAKE3_CHUNK_LEN;// Shrink the subtree_len until it evenly divides the count so far. We know// that subtree_len itself is a power of 2, so we can use a bitmasking// trick instead of an actual remainder operation. (Note that if the caller// consistently passes power-of-2 inputs of the same size, as is hopefully// typical, this loop condition will always fail, and subtree_len will// always be the full length of the input.)//// An aside: We don't have to shrink subtree_len quite this much. For// example, if count_so_far is 1, we could pass 2 chunks to// compress_subtree_to_parent_node. Since we'll get 2 CVs back, we'll still// get the right answer in the end, and we might get to use 2-way SIMD// parallelism. The problem with this optimization, is that it gets us// stuck always hashing 2 chunks. The total number of chunks will remain// odd, and we'll never graduate to higher degrees of parallelism. See// https://github.com/BLAKE3-team/BLAKE3/issues/69.while ((((uint64_t)(subtree_len - 1)) & count_so_far) != 0) {subtree_len /= 2;}// The shrunken subtree_len might now be 1 chunk long. If so, hash that one// chunk by itself. Otherwise, compress the subtree into a pair of CVs.uint64_t subtree_chunks = subtree_len / BLAKE3_CHUNK_LEN;if (subtree_len <= BLAKE3_CHUNK_LEN) {blake3_chunk_state chunk_state;chunk_state_init(&chunk_state, self->key, self->chunk.flags);chunk_state.chunk_counter = self->chunk.chunk_counter;chunk_state_update(&chunk_state, input_bytes, subtree_len);output_t output = chunk_state_output(&chunk_state);uint8_t cv[BLAKE3_OUT_LEN];output_chaining_value(&output, cv);hasher_push_cv(self, cv, chunk_state.chunk_counter);} else {// This is the high-performance happy path, though getting here depends// on the caller giving us a long enough input.uint8_t cv_pair[2 * BLAKE3_OUT_LEN];compress_subtree_to_parent_node(input_bytes, subtree_len, self->key,self->chunk.chunk_counter,self->chunk.flags, cv_pair);hasher_push_cv(self, cv_pair, self->chunk.chunk_counter);hasher_push_cv(self, &cv_pair[BLAKE3_OUT_LEN],self->chunk.chunk_counter + (subtree_chunks / 2));}self->chunk.chunk_counter += subtree_chunks;input_bytes += subtree_len;input_len -= subtree_len;}// If there's any remaining input less than a full chunk, add it to the chunk// state. In that case, also do a final merge loop to make sure the subtree// stack doesn't contain any unmerged pairs. The remaining input means we// know these merges are non-root. This merge loop isn't strictly necessary// here, because hasher_push_chunk_cv already does its own merge loop, but it// simplifies blake3_hasher_finalize below.if (input_len > 0) {chunk_state_update(&self->chunk, input_bytes, input_len);hasher_merge_cv_stack(self, self->chunk.chunk_counter);}}void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,size_t out_len) {blake3_hasher_finalize_seek(self, 0, out, out_len);}void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,uint8_t *out, size_t out_len) {// Explicitly checking for zero avoids causing UB by passing a null pointer// to memcpy. This comes up in practice with things like:// std::vector<uint8_t> v;// blake3_hasher_finalize(&hasher, v.data(), v.size());if (out_len == 0) {return;}// If the subtree stack is empty, then the current chunk is the root.if (self->cv_stack_len == 0) {output_t output = chunk_state_output(&self->chunk);output_root_bytes(&output, seek, out, out_len);return;}// If there are any bytes in the chunk state, finalize that chunk and do a// roll-up merge between that chunk hash and every subtree in the stack. In// this case, the extra merge loop at the end of blake3_hasher_update// guarantees that none of the subtrees in the stack need to be merged with// each other first. Otherwise, if there are no bytes in the chunk state,// then the top of the stack is a chunk hash, and we start the merge from// that.output_t output;size_t cvs_remaining;if (chunk_state_len(&self->chunk) > 0) {cvs_remaining = self->cv_stack_len;output = chunk_state_output(&self->chunk);} else {// There are always at least 2 CVs in the stack in this case.cvs_remaining = self->cv_stack_len - 2;output = parent_output(&self->cv_stack[cvs_remaining * 32], self->key,self->chunk.flags);}while (cvs_remaining > 0) {cvs_remaining -= 1;uint8_t parent_block[BLAKE3_BLOCK_LEN];memcpy(parent_block, &self->cv_stack[cvs_remaining * 32], 32);output_chaining_value(&output, &parent_block[32]);output = parent_output(parent_block, self->key, self->chunk.flags);}output_root_bytes(&output, seek, out, out_len);}void blake3_hasher_reset(blake3_hasher *self) {chunk_state_reset(&self->chunk, self->key, 0);self->cv_stack_len = 0;}
#if defined(__ELF__) && defined(__linux__).section .note.GNU-stack,"",%progbits#endif#if defined(__ELF__) && defined(__CET__) && defined(__has_include)#if __has_include(<cet.h>)#include <cet.h>#endif#endif#if !defined(_CET_ENDBR)#define _CET_ENDBR#endif.intel_syntax noprefix.global blake3_hash_many_sse41.global _blake3_hash_many_sse41.global blake3_compress_in_place_sse41.global _blake3_compress_in_place_sse41.global blake3_compress_xof_sse41.global _blake3_compress_xof_sse41#ifdef __APPLE__.text#else.section .text#endif.p2align 6_blake3_hash_many_sse41:blake3_hash_many_sse41:_CET_ENDBRpush r15push r14push r13push r12push rbxpush rbpmov rbp, rspsub rsp, 360and rsp, 0xFFFFFFFFFFFFFFC0neg r9dmovd xmm0, r9dpshufd xmm0, xmm0, 0x00movdqa xmmword ptr [rsp+0x130], xmm0movdqa xmm1, xmm0pand xmm1, xmmword ptr [ADD0+rip]pand xmm0, xmmword ptr [ADD1+rip]movdqa xmmword ptr [rsp+0x150], xmm0movd xmm0, r8dpshufd xmm0, xmm0, 0x00paddd xmm0, xmm1movdqa xmmword ptr [rsp+0x110], xmm0pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip]pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip]pcmpgtd xmm1, xmm0shr r8, 32movd xmm2, r8dpshufd xmm2, xmm2, 0x00psubd xmm2, xmm1movdqa xmmword ptr [rsp+0x120], xmm2mov rbx, qword ptr [rbp+0x50]mov r15, rdxshl r15, 6movzx r13d, byte ptr [rbp+0x38]movzx r12d, byte ptr [rbp+0x48]cmp rsi, 4jc 3f2:movdqu xmm3, xmmword ptr [rcx]pshufd xmm0, xmm3, 0x00pshufd xmm1, xmm3, 0x55pshufd xmm2, xmm3, 0xAApshufd xmm3, xmm3, 0xFFmovdqu xmm7, xmmword ptr [rcx+0x10]pshufd xmm4, xmm7, 0x00pshufd xmm5, xmm7, 0x55pshufd xmm6, xmm7, 0xAApshufd xmm7, xmm7, 0xFFmov r8, qword ptr [rdi]mov r9, qword ptr [rdi+0x8]mov r10, qword ptr [rdi+0x10]mov r11, qword ptr [rdi+0x18]movzx eax, byte ptr [rbp+0x40]or eax, r13dxor edx, edx9:mov r14d, eaxor eax, r12dadd rdx, 64cmp rdx, r15cmovne eax, r14dmovdqu xmm8, xmmword ptr [r8+rdx-0x40]movdqu xmm9, xmmword ptr [r9+rdx-0x40]movdqu xmm10, xmmword ptr [r10+rdx-0x40]movdqu xmm11, xmmword ptr [r11+rdx-0x40]movdqa xmm12, xmm8punpckldq xmm8, xmm9punpckhdq xmm12, xmm9movdqa xmm14, xmm10punpckldq xmm10, xmm11punpckhdq xmm14, xmm11movdqa xmm9, xmm8punpcklqdq xmm8, xmm10punpckhqdq xmm9, xmm10movdqa xmm13, xmm12punpcklqdq xmm12, xmm14punpckhqdq xmm13, xmm14movdqa xmmword ptr [rsp], xmm8movdqa xmmword ptr [rsp+0x10], xmm9movdqa xmmword ptr [rsp+0x20], xmm12movdqa xmmword ptr [rsp+0x30], xmm13movdqu xmm8, xmmword ptr [r8+rdx-0x30]movdqu xmm9, xmmword ptr [r9+rdx-0x30]movdqu xmm10, xmmword ptr [r10+rdx-0x30]movdqu xmm11, xmmword ptr [r11+rdx-0x30]movdqa xmm12, xmm8punpckldq xmm8, xmm9punpckhdq xmm12, xmm9movdqa xmm14, xmm10punpckldq xmm10, xmm11punpckhdq xmm14, xmm11movdqa xmm9, xmm8punpcklqdq xmm8, xmm10punpckhqdq xmm9, xmm10movdqa xmm13, xmm12punpcklqdq xmm12, xmm14punpckhqdq xmm13, xmm14movdqa xmmword ptr [rsp+0x40], xmm8movdqa xmmword ptr [rsp+0x50], xmm9movdqa xmmword ptr [rsp+0x60], xmm12movdqa xmmword ptr [rsp+0x70], xmm13movdqu xmm8, xmmword ptr [r8+rdx-0x20]movdqu xmm9, xmmword ptr [r9+rdx-0x20]movdqu xmm10, xmmword ptr [r10+rdx-0x20]movdqu xmm11, xmmword ptr [r11+rdx-0x20]movdqa xmm12, xmm8punpckldq xmm8, xmm9punpckhdq xmm12, xmm9movdqa xmm14, xmm10punpckldq xmm10, xmm11punpckhdq xmm14, xmm11movdqa xmm9, xmm8punpcklqdq xmm8, xmm10punpckhqdq xmm9, xmm10movdqa xmm13, xmm12punpcklqdq xmm12, xmm14punpckhqdq xmm13, xmm14movdqa xmmword ptr [rsp+0x80], xmm8movdqa xmmword ptr [rsp+0x90], xmm9movdqa xmmword ptr [rsp+0xA0], xmm12movdqa xmmword ptr [rsp+0xB0], xmm13movdqu xmm8, xmmword ptr [r8+rdx-0x10]movdqu xmm9, xmmword ptr [r9+rdx-0x10]movdqu xmm10, xmmword ptr [r10+rdx-0x10]movdqu xmm11, xmmword ptr [r11+rdx-0x10]movdqa xmm12, xmm8punpckldq xmm8, xmm9punpckhdq xmm12, xmm9movdqa xmm14, xmm10punpckldq xmm10, xmm11punpckhdq xmm14, xmm11movdqa xmm9, xmm8punpcklqdq xmm8, xmm10punpckhqdq xmm9, xmm10movdqa xmm13, xmm12punpcklqdq xmm12, xmm14punpckhqdq xmm13, xmm14movdqa xmmword ptr [rsp+0xC0], xmm8movdqa xmmword ptr [rsp+0xD0], xmm9movdqa xmmword ptr [rsp+0xE0], xmm12movdqa xmmword ptr [rsp+0xF0], xmm13movdqa xmm9, xmmword ptr [BLAKE3_IV_1+rip]movdqa xmm10, xmmword ptr [BLAKE3_IV_2+rip]movdqa xmm11, xmmword ptr [BLAKE3_IV_3+rip]movdqa xmm12, xmmword ptr [rsp+0x110]movdqa xmm13, xmmword ptr [rsp+0x120]movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip]movd xmm15, eaxpshufd xmm15, xmm15, 0x00prefetcht0 [r8+rdx+0x80]prefetcht0 [r9+rdx+0x80]prefetcht0 [r10+rdx+0x80]prefetcht0 [r11+rdx+0x80]paddd xmm0, xmmword ptr [rsp]paddd xmm1, xmmword ptr [rsp+0x20]paddd xmm2, xmmword ptr [rsp+0x40]paddd xmm3, xmmword ptr [rsp+0x60]paddd xmm0, xmm4paddd xmm1, xmm5paddd xmm2, xmm6paddd xmm3, xmm7pxor xmm12, xmm0pxor xmm13, xmm1pxor xmm14, xmm2pxor xmm15, xmm3movdqa xmm8, xmmword ptr [ROT16+rip]pshufb xmm12, xmm8pshufb xmm13, xmm8pshufb xmm14, xmm8pshufb xmm15, xmm8movdqa xmm8, xmmword ptr [BLAKE3_IV_0+rip]paddd xmm8, xmm12paddd xmm9, xmm13paddd xmm10, xmm14paddd xmm11, xmm15pxor xmm4, xmm8pxor xmm5, xmm9pxor xmm6, xmm10pxor xmm7, xmm11movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm4psrld xmm8, 12pslld xmm4, 20por xmm4, xmm8movdqa xmm8, xmm5psrld xmm8, 12pslld xmm5, 20por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 12pslld xmm6, 20por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 12pslld xmm7, 20por xmm7, xmm8paddd xmm0, xmmword ptr [rsp+0x10]paddd xmm1, xmmword ptr [rsp+0x30]paddd xmm2, xmmword ptr [rsp+0x50]paddd xmm3, xmmword ptr [rsp+0x70]paddd xmm0, xmm4paddd xmm1, xmm5paddd xmm2, xmm6paddd xmm3, xmm7pxor xmm12, xmm0pxor xmm13, xmm1pxor xmm14, xmm2pxor xmm15, xmm3movdqa xmm8, xmmword ptr [ROT8+rip]pshufb xmm12, xmm8pshufb xmm13, xmm8pshufb xmm14, xmm8pshufb xmm15, xmm8movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm12paddd xmm9, xmm13paddd xmm10, xmm14paddd xmm11, xmm15pxor xmm4, xmm8pxor xmm5, xmm9pxor xmm6, xmm10pxor xmm7, xmm11movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm4psrld xmm8, 7pslld xmm4, 25por xmm4, xmm8movdqa xmm8, xmm5psrld xmm8, 7pslld xmm5, 25por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 7pslld xmm6, 25por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 7pslld xmm7, 25por xmm7, xmm8paddd xmm0, xmmword ptr [rsp+0x80]paddd xmm1, xmmword ptr [rsp+0xA0]paddd xmm2, xmmword ptr [rsp+0xC0]paddd xmm3, xmmword ptr [rsp+0xE0]paddd xmm0, xmm5paddd xmm1, xmm6paddd xmm2, xmm7paddd xmm3, xmm4pxor xmm15, xmm0pxor xmm12, xmm1pxor xmm13, xmm2pxor xmm14, xmm3movdqa xmm8, xmmword ptr [ROT16+rip]pshufb xmm15, xmm8pshufb xmm12, xmm8pshufb xmm13, xmm8pshufb xmm14, xmm8paddd xmm10, xmm15paddd xmm11, xmm12movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm13paddd xmm9, xmm14pxor xmm5, xmm10pxor xmm6, xmm11pxor xmm7, xmm8pxor xmm4, xmm9movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm5psrld xmm8, 12pslld xmm5, 20por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 12pslld xmm6, 20por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 12pslld xmm7, 20por xmm7, xmm8movdqa xmm8, xmm4psrld xmm8, 12pslld xmm4, 20por xmm4, xmm8paddd xmm0, xmmword ptr [rsp+0x90]paddd xmm1, xmmword ptr [rsp+0xB0]paddd xmm2, xmmword ptr [rsp+0xD0]paddd xmm3, xmmword ptr [rsp+0xF0]paddd xmm0, xmm5paddd xmm1, xmm6paddd xmm2, xmm7paddd xmm3, xmm4pxor xmm15, xmm0pxor xmm12, xmm1pxor xmm13, xmm2pxor xmm14, xmm3movdqa xmm8, xmmword ptr [ROT8+rip]pshufb xmm15, xmm8pshufb xmm12, xmm8pshufb xmm13, xmm8pshufb xmm14, xmm8paddd xmm10, xmm15paddd xmm11, xmm12movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm13paddd xmm9, xmm14pxor xmm5, xmm10pxor xmm6, xmm11pxor xmm7, xmm8pxor xmm4, xmm9movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm5psrld xmm8, 7pslld xmm5, 25por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 7pslld xmm6, 25por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 7pslld xmm7, 25por xmm7, xmm8movdqa xmm8, xmm4psrld xmm8, 7pslld xmm4, 25por xmm4, xmm8paddd xmm0, xmmword ptr [rsp+0x20]paddd xmm1, xmmword ptr [rsp+0x30]paddd xmm2, xmmword ptr [rsp+0x70]paddd xmm3, xmmword ptr [rsp+0x40]paddd xmm0, xmm4paddd xmm1, xmm5paddd xmm2, xmm6paddd xmm3, xmm7pxor xmm12, xmm0pxor xmm13, xmm1pxor xmm14, xmm2pxor xmm15, xmm3movdqa xmm8, xmmword ptr [ROT16+rip]pshufb xmm12, xmm8pshufb xmm13, xmm8pshufb xmm14, xmm8pshufb xmm15, xmm8movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm12paddd xmm9, xmm13paddd xmm10, xmm14paddd xmm11, xmm15pxor xmm4, xmm8pxor xmm5, xmm9pxor xmm6, xmm10pxor xmm7, xmm11movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm4psrld xmm8, 12pslld xmm4, 20por xmm4, xmm8movdqa xmm8, xmm5psrld xmm8, 12pslld xmm5, 20por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 12pslld xmm6, 20por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 12pslld xmm7, 20por xmm7, xmm8paddd xmm0, xmmword ptr [rsp+0x60]paddd xmm1, xmmword ptr [rsp+0xA0]paddd xmm2, xmmword ptr [rsp]paddd xmm3, xmmword ptr [rsp+0xD0]paddd xmm0, xmm4paddd xmm1, xmm5paddd xmm2, xmm6paddd xmm3, xmm7pxor xmm12, xmm0pxor xmm13, xmm1pxor xmm14, xmm2pxor xmm15, xmm3movdqa xmm8, xmmword ptr [ROT8+rip]pshufb xmm12, xmm8pshufb xmm13, xmm8pshufb xmm14, xmm8pshufb xmm15, xmm8movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm12paddd xmm9, xmm13paddd xmm10, xmm14paddd xmm11, xmm15pxor xmm4, xmm8pxor xmm5, xmm9pxor xmm6, xmm10pxor xmm7, xmm11movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm4psrld xmm8, 7pslld xmm4, 25por xmm4, xmm8movdqa xmm8, xmm5psrld xmm8, 7pslld xmm5, 25por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 7pslld xmm6, 25por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 7pslld xmm7, 25por xmm7, xmm8paddd xmm0, xmmword ptr [rsp+0x10]paddd xmm1, xmmword ptr [rsp+0xC0]paddd xmm2, xmmword ptr [rsp+0x90]paddd xmm3, xmmword ptr [rsp+0xF0]paddd xmm0, xmm5paddd xmm1, xmm6paddd xmm2, xmm7paddd xmm3, xmm4pxor xmm15, xmm0pxor xmm12, xmm1pxor xmm13, xmm2pxor xmm14, xmm3movdqa xmm8, xmmword ptr [ROT16+rip]pshufb xmm15, xmm8pshufb xmm12, xmm8pshufb xmm13, xmm8pshufb xmm14, xmm8paddd xmm10, xmm15paddd xmm11, xmm12movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm13paddd xmm9, xmm14pxor xmm5, xmm10pxor xmm6, xmm11pxor xmm7, xmm8pxor xmm4, xmm9movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm5psrld xmm8, 12pslld xmm5, 20por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 12pslld xmm6, 20por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 12pslld xmm7, 20por xmm7, xmm8movdqa xmm8, xmm4psrld xmm8, 12pslld xmm4, 20por xmm4, xmm8paddd xmm0, xmmword ptr [rsp+0xB0]paddd xmm1, xmmword ptr [rsp+0x50]paddd xmm2, xmmword ptr [rsp+0xE0]paddd xmm3, xmmword ptr [rsp+0x80]paddd xmm0, xmm5paddd xmm1, xmm6paddd xmm2, xmm7paddd xmm3, xmm4pxor xmm15, xmm0pxor xmm12, xmm1pxor xmm13, xmm2pxor xmm14, xmm3movdqa xmm8, xmmword ptr [ROT8+rip]pshufb xmm15, xmm8pshufb xmm12, xmm8pshufb xmm13, xmm8pshufb xmm14, xmm8paddd xmm10, xmm15paddd xmm11, xmm12movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm13paddd xmm9, xmm14pxor xmm5, xmm10pxor xmm6, xmm11pxor xmm7, xmm8pxor xmm4, xmm9movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm5psrld xmm8, 7pslld xmm5, 25por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 7pslld xmm6, 25por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 7pslld xmm7, 25por xmm7, xmm8movdqa xmm8, xmm4psrld xmm8, 7pslld xmm4, 25por xmm4, xmm8paddd xmm0, xmmword ptr [rsp+0x30]paddd xmm1, xmmword ptr [rsp+0xA0]paddd xmm2, xmmword ptr [rsp+0xD0]paddd xmm3, xmmword ptr [rsp+0x70]paddd xmm0, xmm4paddd xmm1, xmm5paddd xmm2, xmm6paddd xmm3, xmm7pxor xmm12, xmm0pxor xmm13, xmm1pxor xmm14, xmm2pxor xmm15, xmm3movdqa xmm8, xmmword ptr [ROT16+rip]pshufb xmm12, xmm8pshufb xmm13, xmm8pshufb xmm14, xmm8pshufb xmm15, xmm8movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm12paddd xmm9, xmm13paddd xmm10, xmm14paddd xmm11, xmm15pxor xmm4, xmm8pxor xmm5, xmm9pxor xmm6, xmm10pxor xmm7, xmm11movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm4psrld xmm8, 12pslld xmm4, 20por xmm4, xmm8movdqa xmm8, xmm5psrld xmm8, 12pslld xmm5, 20por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 12pslld xmm6, 20por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 12pslld xmm7, 20por xmm7, xmm8paddd xmm0, xmmword ptr [rsp+0x40]paddd xmm1, xmmword ptr [rsp+0xC0]paddd xmm2, xmmword ptr [rsp+0x20]paddd xmm3, xmmword ptr [rsp+0xE0]paddd xmm0, xmm4paddd xmm1, xmm5paddd xmm2, xmm6paddd xmm3, xmm7pxor xmm12, xmm0pxor xmm13, xmm1pxor xmm14, xmm2pxor xmm15, xmm3movdqa xmm8, xmmword ptr [ROT8+rip]pshufb xmm12, xmm8pshufb xmm13, xmm8pshufb xmm14, xmm8pshufb xmm15, xmm8movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm12paddd xmm9, xmm13paddd xmm10, xmm14paddd xmm11, xmm15pxor xmm4, xmm8pxor xmm5, xmm9pxor xmm6, xmm10pxor xmm7, xmm11movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm4psrld xmm8, 7pslld xmm4, 25por xmm4, xmm8movdqa xmm8, xmm5psrld xmm8, 7pslld xmm5, 25por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 7pslld xmm6, 25por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 7pslld xmm7, 25por xmm7, xmm8paddd xmm0, xmmword ptr [rsp+0x60]paddd xmm1, xmmword ptr [rsp+0x90]paddd xmm2, xmmword ptr [rsp+0xB0]paddd xmm3, xmmword ptr [rsp+0x80]paddd xmm0, xmm5paddd xmm1, xmm6paddd xmm2, xmm7paddd xmm3, xmm4pxor xmm15, xmm0pxor xmm12, xmm1pxor xmm13, xmm2pxor xmm14, xmm3movdqa xmm8, xmmword ptr [ROT16+rip]pshufb xmm15, xmm8pshufb xmm12, xmm8pshufb xmm13, xmm8pshufb xmm14, xmm8paddd xmm10, xmm15paddd xmm11, xmm12movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm13paddd xmm9, xmm14pxor xmm5, xmm10pxor xmm6, xmm11pxor xmm7, xmm8pxor xmm4, xmm9movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm5psrld xmm8, 12pslld xmm5, 20por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 12pslld xmm6, 20por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 12pslld xmm7, 20por xmm7, xmm8movdqa xmm8, xmm4psrld xmm8, 12pslld xmm4, 20por xmm4, xmm8paddd xmm0, xmmword ptr [rsp+0x50]paddd xmm1, xmmword ptr [rsp]paddd xmm2, xmmword ptr [rsp+0xF0]paddd xmm3, xmmword ptr [rsp+0x10]paddd xmm0, xmm5paddd xmm1, xmm6paddd xmm2, xmm7paddd xmm3, xmm4pxor xmm15, xmm0pxor xmm12, xmm1pxor xmm13, xmm2pxor xmm14, xmm3movdqa xmm8, xmmword ptr [ROT8+rip]pshufb xmm15, xmm8pshufb xmm12, xmm8pshufb xmm13, xmm8pshufb xmm14, xmm8paddd xmm10, xmm15paddd xmm11, xmm12movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm13paddd xmm9, xmm14pxor xmm5, xmm10pxor xmm6, xmm11pxor xmm7, xmm8pxor xmm4, xmm9movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm5psrld xmm8, 7pslld xmm5, 25por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 7pslld xmm6, 25por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 7pslld xmm7, 25por xmm7, xmm8movdqa xmm8, xmm4psrld xmm8, 7pslld xmm4, 25por xmm4, xmm8paddd xmm0, xmmword ptr [rsp+0xA0]paddd xmm1, xmmword ptr [rsp+0xC0]paddd xmm2, xmmword ptr [rsp+0xE0]paddd xmm3, xmmword ptr [rsp+0xD0]paddd xmm0, xmm4paddd xmm1, xmm5paddd xmm2, xmm6paddd xmm3, xmm7pxor xmm12, xmm0pxor xmm13, xmm1pxor xmm14, xmm2pxor xmm15, xmm3movdqa xmm8, xmmword ptr [ROT16+rip]pshufb xmm12, xmm8pshufb xmm13, xmm8pshufb xmm14, xmm8pshufb xmm15, xmm8movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm12paddd xmm9, xmm13paddd xmm10, xmm14paddd xmm11, xmm15pxor xmm4, xmm8pxor xmm5, xmm9pxor xmm6, xmm10pxor xmm7, xmm11movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm4psrld xmm8, 12pslld xmm4, 20por xmm4, xmm8movdqa xmm8, xmm5psrld xmm8, 12pslld xmm5, 20por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 12pslld xmm6, 20por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 12pslld xmm7, 20por xmm7, xmm8paddd xmm0, xmmword ptr [rsp+0x70]paddd xmm1, xmmword ptr [rsp+0x90]paddd xmm2, xmmword ptr [rsp+0x30]paddd xmm3, xmmword ptr [rsp+0xF0]paddd xmm0, xmm4paddd xmm1, xmm5paddd xmm2, xmm6paddd xmm3, xmm7pxor xmm12, xmm0pxor xmm13, xmm1pxor xmm14, xmm2pxor xmm15, xmm3movdqa xmm8, xmmword ptr [ROT8+rip]pshufb xmm12, xmm8pshufb xmm13, xmm8pshufb xmm14, xmm8pshufb xmm15, xmm8movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm12paddd xmm9, xmm13paddd xmm10, xmm14paddd xmm11, xmm15pxor xmm4, xmm8pxor xmm5, xmm9pxor xmm6, xmm10pxor xmm7, xmm11movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm4psrld xmm8, 7pslld xmm4, 25por xmm4, xmm8movdqa xmm8, xmm5psrld xmm8, 7pslld xmm5, 25por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 7pslld xmm6, 25por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 7pslld xmm7, 25por xmm7, xmm8paddd xmm0, xmmword ptr [rsp+0x40]paddd xmm1, xmmword ptr [rsp+0xB0]paddd xmm2, xmmword ptr [rsp+0x50]paddd xmm3, xmmword ptr [rsp+0x10]paddd xmm0, xmm5paddd xmm1, xmm6paddd xmm2, xmm7paddd xmm3, xmm4pxor xmm15, xmm0pxor xmm12, xmm1pxor xmm13, xmm2pxor xmm14, xmm3movdqa xmm8, xmmword ptr [ROT16+rip]pshufb xmm15, xmm8pshufb xmm12, xmm8pshufb xmm13, xmm8pshufb xmm14, xmm8paddd xmm10, xmm15paddd xmm11, xmm12movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm13paddd xmm9, xmm14pxor xmm5, xmm10pxor xmm6, xmm11pxor xmm7, xmm8pxor xmm4, xmm9movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm5psrld xmm8, 12pslld xmm5, 20por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 12pslld xmm6, 20por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 12pslld xmm7, 20por xmm7, xmm8movdqa xmm8, xmm4psrld xmm8, 12pslld xmm4, 20por xmm4, xmm8paddd xmm0, xmmword ptr [rsp]paddd xmm1, xmmword ptr [rsp+0x20]paddd xmm2, xmmword ptr [rsp+0x80]paddd xmm3, xmmword ptr [rsp+0x60]paddd xmm0, xmm5paddd xmm1, xmm6paddd xmm2, xmm7paddd xmm3, xmm4pxor xmm15, xmm0pxor xmm12, xmm1pxor xmm13, xmm2pxor xmm14, xmm3movdqa xmm8, xmmword ptr [ROT8+rip]pshufb xmm15, xmm8pshufb xmm12, xmm8pshufb xmm13, xmm8pshufb xmm14, xmm8paddd xmm10, xmm15paddd xmm11, xmm12movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm13paddd xmm9, xmm14pxor xmm5, xmm10pxor xmm6, xmm11pxor xmm7, xmm8pxor xmm4, xmm9movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm5psrld xmm8, 7pslld xmm5, 25por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 7pslld xmm6, 25por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 7pslld xmm7, 25por xmm7, xmm8movdqa xmm8, xmm4psrld xmm8, 7pslld xmm4, 25por xmm4, xmm8paddd xmm0, xmmword ptr [rsp+0xC0]paddd xmm1, xmmword ptr [rsp+0x90]paddd xmm2, xmmword ptr [rsp+0xF0]paddd xmm3, xmmword ptr [rsp+0xE0]paddd xmm0, xmm4paddd xmm1, xmm5paddd xmm2, xmm6paddd xmm3, xmm7pxor xmm12, xmm0pxor xmm13, xmm1pxor xmm14, xmm2pxor xmm15, xmm3movdqa xmm8, xmmword ptr [ROT16+rip]pshufb xmm12, xmm8pshufb xmm13, xmm8pshufb xmm14, xmm8pshufb xmm15, xmm8movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm12paddd xmm9, xmm13paddd xmm10, xmm14paddd xmm11, xmm15pxor xmm4, xmm8pxor xmm5, xmm9pxor xmm6, xmm10pxor xmm7, xmm11movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm4psrld xmm8, 12pslld xmm4, 20por xmm4, xmm8movdqa xmm8, xmm5psrld xmm8, 12pslld xmm5, 20por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 12pslld xmm6, 20por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 12pslld xmm7, 20por xmm7, xmm8paddd xmm0, xmmword ptr [rsp+0xD0]paddd xmm1, xmmword ptr [rsp+0xB0]paddd xmm2, xmmword ptr [rsp+0xA0]paddd xmm3, xmmword ptr [rsp+0x80]paddd xmm0, xmm4paddd xmm1, xmm5paddd xmm2, xmm6paddd xmm3, xmm7pxor xmm12, xmm0pxor xmm13, xmm1pxor xmm14, xmm2pxor xmm15, xmm3movdqa xmm8, xmmword ptr [ROT8+rip]pshufb xmm12, xmm8pshufb xmm13, xmm8pshufb xmm14, xmm8pshufb xmm15, xmm8movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm12paddd xmm9, xmm13paddd xmm10, xmm14paddd xmm11, xmm15pxor xmm4, xmm8pxor xmm5, xmm9pxor xmm6, xmm10pxor xmm7, xmm11movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm4psrld xmm8, 7pslld xmm4, 25por xmm4, xmm8movdqa xmm8, xmm5psrld xmm8, 7pslld xmm5, 25por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 7pslld xmm6, 25por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 7pslld xmm7, 25por xmm7, xmm8paddd xmm0, xmmword ptr [rsp+0x70]paddd xmm1, xmmword ptr [rsp+0x50]paddd xmm2, xmmword ptr [rsp]paddd xmm3, xmmword ptr [rsp+0x60]paddd xmm0, xmm5paddd xmm1, xmm6paddd xmm2, xmm7paddd xmm3, xmm4pxor xmm15, xmm0pxor xmm12, xmm1pxor xmm13, xmm2pxor xmm14, xmm3movdqa xmm8, xmmword ptr [ROT16+rip]pshufb xmm15, xmm8pshufb xmm12, xmm8pshufb xmm13, xmm8pshufb xmm14, xmm8paddd xmm10, xmm15paddd xmm11, xmm12movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm13paddd xmm9, xmm14pxor xmm5, xmm10pxor xmm6, xmm11pxor xmm7, xmm8pxor xmm4, xmm9movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm5psrld xmm8, 12pslld xmm5, 20por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 12pslld xmm6, 20por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 12pslld xmm7, 20por xmm7, xmm8movdqa xmm8, xmm4psrld xmm8, 12pslld xmm4, 20por xmm4, xmm8paddd xmm0, xmmword ptr [rsp+0x20]paddd xmm1, xmmword ptr [rsp+0x30]paddd xmm2, xmmword ptr [rsp+0x10]paddd xmm3, xmmword ptr [rsp+0x40]paddd xmm0, xmm5paddd xmm1, xmm6paddd xmm2, xmm7paddd xmm3, xmm4pxor xmm15, xmm0pxor xmm12, xmm1pxor xmm13, xmm2pxor xmm14, xmm3movdqa xmm8, xmmword ptr [ROT8+rip]pshufb xmm15, xmm8pshufb xmm12, xmm8pshufb xmm13, xmm8pshufb xmm14, xmm8paddd xmm10, xmm15paddd xmm11, xmm12movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm13paddd xmm9, xmm14pxor xmm5, xmm10pxor xmm6, xmm11pxor xmm7, xmm8pxor xmm4, xmm9movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm5psrld xmm8, 7pslld xmm5, 25por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 7pslld xmm6, 25por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 7pslld xmm7, 25por xmm7, xmm8movdqa xmm8, xmm4psrld xmm8, 7pslld xmm4, 25por xmm4, xmm8paddd xmm0, xmmword ptr [rsp+0x90]paddd xmm1, xmmword ptr [rsp+0xB0]paddd xmm2, xmmword ptr [rsp+0x80]paddd xmm3, xmmword ptr [rsp+0xF0]paddd xmm0, xmm4paddd xmm1, xmm5paddd xmm2, xmm6paddd xmm3, xmm7pxor xmm12, xmm0pxor xmm13, xmm1pxor xmm14, xmm2pxor xmm15, xmm3movdqa xmm8, xmmword ptr [ROT16+rip]pshufb xmm12, xmm8pshufb xmm13, xmm8pshufb xmm14, xmm8pshufb xmm15, xmm8movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm12paddd xmm9, xmm13paddd xmm10, xmm14paddd xmm11, xmm15pxor xmm4, xmm8pxor xmm5, xmm9pxor xmm6, xmm10pxor xmm7, xmm11movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm4psrld xmm8, 12pslld xmm4, 20por xmm4, xmm8movdqa xmm8, xmm5psrld xmm8, 12pslld xmm5, 20por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 12pslld xmm6, 20por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 12pslld xmm7, 20por xmm7, xmm8paddd xmm0, xmmword ptr [rsp+0xE0]paddd xmm1, xmmword ptr [rsp+0x50]paddd xmm2, xmmword ptr [rsp+0xC0]paddd xmm3, xmmword ptr [rsp+0x10]paddd xmm0, xmm4paddd xmm1, xmm5paddd xmm2, xmm6paddd xmm3, xmm7pxor xmm12, xmm0pxor xmm13, xmm1pxor xmm14, xmm2pxor xmm15, xmm3movdqa xmm8, xmmword ptr [ROT8+rip]pshufb xmm12, xmm8pshufb xmm13, xmm8pshufb xmm14, xmm8pshufb xmm15, xmm8movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm12paddd xmm9, xmm13paddd xmm10, xmm14paddd xmm11, xmm15pxor xmm4, xmm8pxor xmm5, xmm9pxor xmm6, xmm10pxor xmm7, xmm11movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm4psrld xmm8, 7pslld xmm4, 25por xmm4, xmm8movdqa xmm8, xmm5psrld xmm8, 7pslld xmm5, 25por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 7pslld xmm6, 25por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 7pslld xmm7, 25por xmm7, xmm8paddd xmm0, xmmword ptr [rsp+0xD0]paddd xmm1, xmmword ptr [rsp]paddd xmm2, xmmword ptr [rsp+0x20]paddd xmm3, xmmword ptr [rsp+0x40]paddd xmm0, xmm5paddd xmm1, xmm6paddd xmm2, xmm7paddd xmm3, xmm4pxor xmm15, xmm0pxor xmm12, xmm1pxor xmm13, xmm2pxor xmm14, xmm3movdqa xmm8, xmmword ptr [ROT16+rip]pshufb xmm15, xmm8pshufb xmm12, xmm8pshufb xmm13, xmm8pshufb xmm14, xmm8paddd xmm10, xmm15paddd xmm11, xmm12movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm13paddd xmm9, xmm14pxor xmm5, xmm10pxor xmm6, xmm11pxor xmm7, xmm8pxor xmm4, xmm9movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm5psrld xmm8, 12pslld xmm5, 20por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 12pslld xmm6, 20por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 12pslld xmm7, 20por xmm7, xmm8movdqa xmm8, xmm4psrld xmm8, 12pslld xmm4, 20por xmm4, xmm8paddd xmm0, xmmword ptr [rsp+0x30]paddd xmm1, xmmword ptr [rsp+0xA0]paddd xmm2, xmmword ptr [rsp+0x60]paddd xmm3, xmmword ptr [rsp+0x70]paddd xmm0, xmm5paddd xmm1, xmm6paddd xmm2, xmm7paddd xmm3, xmm4pxor xmm15, xmm0pxor xmm12, xmm1pxor xmm13, xmm2pxor xmm14, xmm3movdqa xmm8, xmmword ptr [ROT8+rip]pshufb xmm15, xmm8pshufb xmm12, xmm8pshufb xmm13, xmm8pshufb xmm14, xmm8paddd xmm10, xmm15paddd xmm11, xmm12movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm13paddd xmm9, xmm14pxor xmm5, xmm10pxor xmm6, xmm11pxor xmm7, xmm8pxor xmm4, xmm9movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm5psrld xmm8, 7pslld xmm5, 25por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 7pslld xmm6, 25por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 7pslld xmm7, 25por xmm7, xmm8movdqa xmm8, xmm4psrld xmm8, 7pslld xmm4, 25por xmm4, xmm8paddd xmm0, xmmword ptr [rsp+0xB0]paddd xmm1, xmmword ptr [rsp+0x50]paddd xmm2, xmmword ptr [rsp+0x10]paddd xmm3, xmmword ptr [rsp+0x80]paddd xmm0, xmm4paddd xmm1, xmm5paddd xmm2, xmm6paddd xmm3, xmm7pxor xmm12, xmm0pxor xmm13, xmm1pxor xmm14, xmm2pxor xmm15, xmm3movdqa xmm8, xmmword ptr [ROT16+rip]pshufb xmm12, xmm8pshufb xmm13, xmm8pshufb xmm14, xmm8pshufb xmm15, xmm8movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm12paddd xmm9, xmm13paddd xmm10, xmm14paddd xmm11, xmm15pxor xmm4, xmm8pxor xmm5, xmm9pxor xmm6, xmm10pxor xmm7, xmm11movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm4psrld xmm8, 12pslld xmm4, 20por xmm4, xmm8movdqa xmm8, xmm5psrld xmm8, 12pslld xmm5, 20por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 12pslld xmm6, 20por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 12pslld xmm7, 20por xmm7, xmm8paddd xmm0, xmmword ptr [rsp+0xF0]paddd xmm1, xmmword ptr [rsp]paddd xmm2, xmmword ptr [rsp+0x90]paddd xmm3, xmmword ptr [rsp+0x60]paddd xmm0, xmm4paddd xmm1, xmm5paddd xmm2, xmm6paddd xmm3, xmm7pxor xmm12, xmm0pxor xmm13, xmm1pxor xmm14, xmm2pxor xmm15, xmm3movdqa xmm8, xmmword ptr [ROT8+rip]pshufb xmm12, xmm8pshufb xmm13, xmm8pshufb xmm14, xmm8pshufb xmm15, xmm8movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm12paddd xmm9, xmm13paddd xmm10, xmm14paddd xmm11, xmm15pxor xmm4, xmm8pxor xmm5, xmm9pxor xmm6, xmm10pxor xmm7, xmm11movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm4psrld xmm8, 7pslld xmm4, 25por xmm4, xmm8movdqa xmm8, xmm5psrld xmm8, 7pslld xmm5, 25por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 7pslld xmm6, 25por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 7pslld xmm7, 25por xmm7, xmm8paddd xmm0, xmmword ptr [rsp+0xE0]paddd xmm1, xmmword ptr [rsp+0x20]paddd xmm2, xmmword ptr [rsp+0x30]paddd xmm3, xmmword ptr [rsp+0x70]paddd xmm0, xmm5paddd xmm1, xmm6paddd xmm2, xmm7paddd xmm3, xmm4pxor xmm15, xmm0pxor xmm12, xmm1pxor xmm13, xmm2pxor xmm14, xmm3movdqa xmm8, xmmword ptr [ROT16+rip]pshufb xmm15, xmm8pshufb xmm12, xmm8pshufb xmm13, xmm8pshufb xmm14, xmm8paddd xmm10, xmm15paddd xmm11, xmm12movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm13paddd xmm9, xmm14pxor xmm5, xmm10pxor xmm6, xmm11pxor xmm7, xmm8pxor xmm4, xmm9movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm5psrld xmm8, 12pslld xmm5, 20por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 12pslld xmm6, 20por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 12pslld xmm7, 20por xmm7, xmm8movdqa xmm8, xmm4psrld xmm8, 12pslld xmm4, 20por xmm4, xmm8paddd xmm0, xmmword ptr [rsp+0xA0]paddd xmm1, xmmword ptr [rsp+0xC0]paddd xmm2, xmmword ptr [rsp+0x40]paddd xmm3, xmmword ptr [rsp+0xD0]paddd xmm0, xmm5paddd xmm1, xmm6paddd xmm2, xmm7paddd xmm3, xmm4pxor xmm15, xmm0pxor xmm12, xmm1pxor xmm13, xmm2pxor xmm14, xmm3movdqa xmm8, xmmword ptr [ROT8+rip]pshufb xmm15, xmm8pshufb xmm12, xmm8pshufb xmm13, xmm8pshufb xmm14, xmm8paddd xmm10, xmm15paddd xmm11, xmm12movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm13paddd xmm9, xmm14pxor xmm5, xmm10pxor xmm6, xmm11pxor xmm7, xmm8pxor xmm4, xmm9pxor xmm0, xmm8pxor xmm1, xmm9pxor xmm2, xmm10pxor xmm3, xmm11movdqa xmm8, xmm5psrld xmm8, 7pslld xmm5, 25por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 7pslld xmm6, 25por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 7pslld xmm7, 25por xmm7, xmm8movdqa xmm8, xmm4psrld xmm8, 7pslld xmm4, 25por xmm4, xmm8pxor xmm4, xmm12pxor xmm5, xmm13pxor xmm6, xmm14pxor xmm7, xmm15mov eax, r13djne 9bmovdqa xmm9, xmm0punpckldq xmm0, xmm1punpckhdq xmm9, xmm1movdqa xmm11, xmm2punpckldq xmm2, xmm3punpckhdq xmm11, xmm3movdqa xmm1, xmm0punpcklqdq xmm0, xmm2punpckhqdq xmm1, xmm2movdqa xmm3, xmm9punpcklqdq xmm9, xmm11punpckhqdq xmm3, xmm11movdqu xmmword ptr [rbx], xmm0movdqu xmmword ptr [rbx+0x20], xmm1movdqu xmmword ptr [rbx+0x40], xmm9movdqu xmmword ptr [rbx+0x60], xmm3movdqa xmm9, xmm4punpckldq xmm4, xmm5punpckhdq xmm9, xmm5movdqa xmm11, xmm6punpckldq xmm6, xmm7punpckhdq xmm11, xmm7movdqa xmm5, xmm4punpcklqdq xmm4, xmm6punpckhqdq xmm5, xmm6movdqa xmm7, xmm9punpcklqdq xmm9, xmm11punpckhqdq xmm7, xmm11movdqu xmmword ptr [rbx+0x10], xmm4movdqu xmmword ptr [rbx+0x30], xmm5movdqu xmmword ptr [rbx+0x50], xmm9movdqu xmmword ptr [rbx+0x70], xmm7movdqa xmm1, xmmword ptr [rsp+0x110]movdqa xmm0, xmm1paddd xmm1, xmmword ptr [rsp+0x150]movdqa xmmword ptr [rsp+0x110], xmm1pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip]pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip]pcmpgtd xmm0, xmm1movdqa xmm1, xmmword ptr [rsp+0x120]psubd xmm1, xmm0movdqa xmmword ptr [rsp+0x120], xmm1add rbx, 128add rdi, 32sub rsi, 4cmp rsi, 4jnc 2btest rsi, rsijnz 3f4:mov rsp, rbppop rbppop rbxpop r12pop r13pop r14pop r15ret.p2align 53:test esi, 0x2je 3fmovups xmm0, xmmword ptr [rcx]movups xmm1, xmmword ptr [rcx+0x10]movaps xmm8, xmm0movaps xmm9, xmm1movd xmm13, dword ptr [rsp+0x110]pinsrd xmm13, dword ptr [rsp+0x120], 1pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2movaps xmmword ptr [rsp], xmm13movd xmm14, dword ptr [rsp+0x114]pinsrd xmm14, dword ptr [rsp+0x124], 1pinsrd xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2movaps xmmword ptr [rsp+0x10], xmm14mov r8, qword ptr [rdi]mov r9, qword ptr [rdi+0x8]movzx eax, byte ptr [rbp+0x40]or eax, r13dxor edx, edx2:mov r14d, eaxor eax, r12dadd rdx, 64cmp rdx, r15cmovne eax, r14dmovaps xmm2, xmmword ptr [BLAKE3_IV+rip]movaps xmm10, xmm2movups xmm4, xmmword ptr [r8+rdx-0x40]movups xmm5, xmmword ptr [r8+rdx-0x30]movaps xmm3, xmm4shufps xmm4, xmm5, 136shufps xmm3, xmm5, 221movaps xmm5, xmm3movups xmm6, xmmword ptr [r8+rdx-0x20]movups xmm7, xmmword ptr [r8+rdx-0x10]movaps xmm3, xmm6shufps xmm6, xmm7, 136pshufd xmm6, xmm6, 0x93shufps xmm3, xmm7, 221pshufd xmm7, xmm3, 0x93movups xmm12, xmmword ptr [r9+rdx-0x40]movups xmm13, xmmword ptr [r9+rdx-0x30]movaps xmm11, xmm12shufps xmm12, xmm13, 136shufps xmm11, xmm13, 221movaps xmm13, xmm11movups xmm14, xmmword ptr [r9+rdx-0x20]movups xmm15, xmmword ptr [r9+rdx-0x10]movaps xmm11, xmm14shufps xmm14, xmm15, 136pshufd xmm14, xmm14, 0x93shufps xmm11, xmm15, 221pshufd xmm15, xmm11, 0x93movaps xmm3, xmmword ptr [rsp]movaps xmm11, xmmword ptr [rsp+0x10]pinsrd xmm3, eax, 3pinsrd xmm11, eax, 3mov al, 79:paddd xmm0, xmm4paddd xmm8, xmm12movaps xmmword ptr [rsp+0x20], xmm4movaps xmmword ptr [rsp+0x30], xmm12paddd xmm0, xmm1paddd xmm8, xmm9pxor xmm3, xmm0pxor xmm11, xmm8movaps xmm12, xmmword ptr [ROT16+rip]pshufb xmm3, xmm12pshufb xmm11, xmm12paddd xmm2, xmm3paddd xmm10, xmm11pxor xmm1, xmm2pxor xmm9, xmm10movdqa xmm4, xmm1pslld xmm1, 20psrld xmm4, 12por xmm1, xmm4movdqa xmm4, xmm9pslld xmm9, 20psrld xmm4, 12por xmm9, xmm4paddd xmm0, xmm5paddd xmm8, xmm13movaps xmmword ptr [rsp+0x40], xmm5movaps xmmword ptr [rsp+0x50], xmm13paddd xmm0, xmm1paddd xmm8, xmm9pxor xmm3, xmm0pxor xmm11, xmm8movaps xmm13, xmmword ptr [ROT8+rip]pshufb xmm3, xmm13pshufb xmm11, xmm13paddd xmm2, xmm3paddd xmm10, xmm11pxor xmm1, xmm2pxor xmm9, xmm10movdqa xmm4, xmm1pslld xmm1, 25psrld xmm4, 7por xmm1, xmm4movdqa xmm4, xmm9pslld xmm9, 25psrld xmm4, 7por xmm9, xmm4pshufd xmm0, xmm0, 0x93pshufd xmm8, xmm8, 0x93pshufd xmm3, xmm3, 0x4Epshufd xmm11, xmm11, 0x4Epshufd xmm2, xmm2, 0x39pshufd xmm10, xmm10, 0x39paddd xmm0, xmm6paddd xmm8, xmm14paddd xmm0, xmm1paddd xmm8, xmm9pxor xmm3, xmm0pxor xmm11, xmm8pshufb xmm3, xmm12pshufb xmm11, xmm12paddd xmm2, xmm3paddd xmm10, xmm11pxor xmm1, xmm2pxor xmm9, xmm10movdqa xmm4, xmm1pslld xmm1, 20psrld xmm4, 12por xmm1, xmm4movdqa xmm4, xmm9pslld xmm9, 20psrld xmm4, 12por xmm9, xmm4paddd xmm0, xmm7paddd xmm8, xmm15paddd xmm0, xmm1paddd xmm8, xmm9pxor xmm3, xmm0pxor xmm11, xmm8pshufb xmm3, xmm13pshufb xmm11, xmm13paddd xmm2, xmm3paddd xmm10, xmm11pxor xmm1, xmm2pxor xmm9, xmm10movdqa xmm4, xmm1pslld xmm1, 25psrld xmm4, 7por xmm1, xmm4movdqa xmm4, xmm9pslld xmm9, 25psrld xmm4, 7por xmm9, xmm4pshufd xmm0, xmm0, 0x39pshufd xmm8, xmm8, 0x39pshufd xmm3, xmm3, 0x4Epshufd xmm11, xmm11, 0x4Epshufd xmm2, xmm2, 0x93pshufd xmm10, xmm10, 0x93dec alje 9fmovdqa xmm12, xmmword ptr [rsp+0x20]movdqa xmm5, xmmword ptr [rsp+0x40]pshufd xmm13, xmm12, 0x0Fshufps xmm12, xmm5, 214pshufd xmm4, xmm12, 0x39movdqa xmm12, xmm6shufps xmm12, xmm7, 250pblendw xmm13, xmm12, 0xCCmovdqa xmm12, xmm7punpcklqdq xmm12, xmm5pblendw xmm12, xmm6, 0xC0pshufd xmm12, xmm12, 0x78punpckhdq xmm5, xmm7punpckldq xmm6, xmm5pshufd xmm7, xmm6, 0x1Emovdqa xmmword ptr [rsp+0x20], xmm13movdqa xmmword ptr [rsp+0x40], xmm12movdqa xmm5, xmmword ptr [rsp+0x30]movdqa xmm13, xmmword ptr [rsp+0x50]pshufd xmm6, xmm5, 0x0Fshufps xmm5, xmm13, 214pshufd xmm12, xmm5, 0x39movdqa xmm5, xmm14shufps xmm5, xmm15, 250pblendw xmm6, xmm5, 0xCCmovdqa xmm5, xmm15punpcklqdq xmm5, xmm13pblendw xmm5, xmm14, 0xC0pshufd xmm5, xmm5, 0x78punpckhdq xmm13, xmm15punpckldq xmm14, xmm13pshufd xmm15, xmm14, 0x1Emovdqa xmm13, xmm6movdqa xmm14, xmm5movdqa xmm5, xmmword ptr [rsp+0x20]movdqa xmm6, xmmword ptr [rsp+0x40]jmp 9b9:pxor xmm0, xmm2pxor xmm1, xmm3pxor xmm8, xmm10pxor xmm9, xmm11mov eax, r13dcmp rdx, r15jne 2bmovups xmmword ptr [rbx], xmm0movups xmmword ptr [rbx+0x10], xmm1movups xmmword ptr [rbx+0x20], xmm8movups xmmword ptr [rbx+0x30], xmm9movdqa xmm0, xmmword ptr [rsp+0x130]movdqa xmm1, xmmword ptr [rsp+0x110]movdqa xmm2, xmmword ptr [rsp+0x120]movdqu xmm3, xmmword ptr [rsp+0x118]movdqu xmm4, xmmword ptr [rsp+0x128]blendvps xmm1, xmm3, xmm0blendvps xmm2, xmm4, xmm0movdqa xmmword ptr [rsp+0x110], xmm1movdqa xmmword ptr [rsp+0x120], xmm2add rdi, 16add rbx, 64sub rsi, 23:test esi, 0x1je 4bmovups xmm0, xmmword ptr [rcx]movups xmm1, xmmword ptr [rcx+0x10]movd xmm13, dword ptr [rsp+0x110]pinsrd xmm13, dword ptr [rsp+0x120], 1pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2movaps xmm14, xmmword ptr [ROT8+rip]movaps xmm15, xmmword ptr [ROT16+rip]mov r8, qword ptr [rdi]movzx eax, byte ptr [rbp+0x40]or eax, r13dxor edx, edx2:mov r14d, eaxor eax, r12dadd rdx, 64cmp rdx, r15cmovne eax, r14dmovaps xmm2, xmmword ptr [BLAKE3_IV+rip]movaps xmm3, xmm13pinsrd xmm3, eax, 3movups xmm4, xmmword ptr [r8+rdx-0x40]movups xmm5, xmmword ptr [r8+rdx-0x30]movaps xmm8, xmm4shufps xmm4, xmm5, 136shufps xmm8, xmm5, 221movaps xmm5, xmm8movups xmm6, xmmword ptr [r8+rdx-0x20]movups xmm7, xmmword ptr [r8+rdx-0x10]movaps xmm8, xmm6shufps xmm6, xmm7, 136pshufd xmm6, xmm6, 0x93shufps xmm8, xmm7, 221pshufd xmm7, xmm8, 0x93mov al, 79:paddd xmm0, xmm4paddd xmm0, xmm1pxor xmm3, xmm0pshufb xmm3, xmm15paddd xmm2, xmm3pxor xmm1, xmm2movdqa xmm11, xmm1pslld xmm1, 20psrld xmm11, 12por xmm1, xmm11paddd xmm0, xmm5paddd xmm0, xmm1pxor xmm3, xmm0pshufb xmm3, xmm14paddd xmm2, xmm3pxor xmm1, xmm2movdqa xmm11, xmm1pslld xmm1, 25psrld xmm11, 7por xmm1, xmm11pshufd xmm0, xmm0, 0x93pshufd xmm3, xmm3, 0x4Epshufd xmm2, xmm2, 0x39paddd xmm0, xmm6paddd xmm0, xmm1pxor xmm3, xmm0pshufb xmm3, xmm15paddd xmm2, xmm3pxor xmm1, xmm2movdqa xmm11, xmm1pslld xmm1, 20psrld xmm11, 12por xmm1, xmm11paddd xmm0, xmm7paddd xmm0, xmm1pxor xmm3, xmm0pshufb xmm3, xmm14paddd xmm2, xmm3pxor xmm1, xmm2movdqa xmm11, xmm1pslld xmm1, 25psrld xmm11, 7por xmm1, xmm11pshufd xmm0, xmm0, 0x39pshufd xmm3, xmm3, 0x4Epshufd xmm2, xmm2, 0x93dec aljz 9fmovdqa xmm8, xmm4shufps xmm8, xmm5, 214pshufd xmm9, xmm4, 0x0Fpshufd xmm4, xmm8, 0x39movdqa xmm8, xmm6shufps xmm8, xmm7, 250pblendw xmm9, xmm8, 0xCCmovdqa xmm8, xmm7punpcklqdq xmm8, xmm5pblendw xmm8, xmm6, 0xC0pshufd xmm8, xmm8, 0x78punpckhdq xmm5, xmm7punpckldq xmm6, xmm5pshufd xmm7, xmm6, 0x1Emovdqa xmm5, xmm9movdqa xmm6, xmm8jmp 9b9:pxor xmm0, xmm2pxor xmm1, xmm3mov eax, r13dcmp rdx, r15jne 2bmovups xmmword ptr [rbx], xmm0movups xmmword ptr [rbx+0x10], xmm1jmp 4b.p2align 6blake3_compress_in_place_sse41:_blake3_compress_in_place_sse41:_CET_ENDBRmovups xmm0, xmmword ptr [rdi]movups xmm1, xmmword ptr [rdi+0x10]movaps xmm2, xmmword ptr [BLAKE3_IV+rip]shl r8, 32add rdx, r8movq xmm3, rcxmovq xmm4, rdxpunpcklqdq xmm3, xmm4movups xmm4, xmmword ptr [rsi]movups xmm5, xmmword ptr [rsi+0x10]movaps xmm8, xmm4shufps xmm4, xmm5, 136shufps xmm8, xmm5, 221movaps xmm5, xmm8movups xmm6, xmmword ptr [rsi+0x20]movups xmm7, xmmword ptr [rsi+0x30]movaps xmm8, xmm6shufps xmm6, xmm7, 136pshufd xmm6, xmm6, 0x93shufps xmm8, xmm7, 221pshufd xmm7, xmm8, 0x93movaps xmm14, xmmword ptr [ROT8+rip]movaps xmm15, xmmword ptr [ROT16+rip]mov al, 79:paddd xmm0, xmm4paddd xmm0, xmm1pxor xmm3, xmm0pshufb xmm3, xmm15paddd xmm2, xmm3pxor xmm1, xmm2movdqa xmm11, xmm1pslld xmm1, 20psrld xmm11, 12por xmm1, xmm11paddd xmm0, xmm5paddd xmm0, xmm1pxor xmm3, xmm0pshufb xmm3, xmm14paddd xmm2, xmm3pxor xmm1, xmm2movdqa xmm11, xmm1pslld xmm1, 25psrld xmm11, 7por xmm1, xmm11pshufd xmm0, xmm0, 0x93pshufd xmm3, xmm3, 0x4Epshufd xmm2, xmm2, 0x39paddd xmm0, xmm6paddd xmm0, xmm1pxor xmm3, xmm0pshufb xmm3, xmm15paddd xmm2, xmm3pxor xmm1, xmm2movdqa xmm11, xmm1pslld xmm1, 20psrld xmm11, 12por xmm1, xmm11paddd xmm0, xmm7paddd xmm0, xmm1pxor xmm3, xmm0pshufb xmm3, xmm14paddd xmm2, xmm3pxor xmm1, xmm2movdqa xmm11, xmm1pslld xmm1, 25psrld xmm11, 7por xmm1, xmm11pshufd xmm0, xmm0, 0x39pshufd xmm3, xmm3, 0x4Epshufd xmm2, xmm2, 0x93dec aljz 9fmovdqa xmm8, xmm4shufps xmm8, xmm5, 214pshufd xmm9, xmm4, 0x0Fpshufd xmm4, xmm8, 0x39movdqa xmm8, xmm6shufps xmm8, xmm7, 250pblendw xmm9, xmm8, 0xCCmovdqa xmm8, xmm7punpcklqdq xmm8, xmm5pblendw xmm8, xmm6, 0xC0pshufd xmm8, xmm8, 0x78punpckhdq xmm5, xmm7punpckldq xmm6, xmm5pshufd xmm7, xmm6, 0x1Emovdqa xmm5, xmm9movdqa xmm6, xmm8jmp 9b9:pxor xmm0, xmm2pxor xmm1, xmm3movups xmmword ptr [rdi], xmm0movups xmmword ptr [rdi+0x10], xmm1ret.p2align 6blake3_compress_xof_sse41:_blake3_compress_xof_sse41:_CET_ENDBRmovups xmm0, xmmword ptr [rdi]movups xmm1, xmmword ptr [rdi+0x10]movaps xmm2, xmmword ptr [BLAKE3_IV+rip]movzx eax, r8bmovzx edx, dlshl rax, 32add rdx, raxmovq xmm3, rcxmovq xmm4, rdxpunpcklqdq xmm3, xmm4movups xmm4, xmmword ptr [rsi]movups xmm5, xmmword ptr [rsi+0x10]movaps xmm8, xmm4shufps xmm4, xmm5, 136shufps xmm8, xmm5, 221movaps xmm5, xmm8movups xmm6, xmmword ptr [rsi+0x20]movups xmm7, xmmword ptr [rsi+0x30]movaps xmm8, xmm6shufps xmm6, xmm7, 136pshufd xmm6, xmm6, 0x93shufps xmm8, xmm7, 221pshufd xmm7, xmm8, 0x93movaps xmm14, xmmword ptr [ROT8+rip]movaps xmm15, xmmword ptr [ROT16+rip]mov al, 79:paddd xmm0, xmm4paddd xmm0, xmm1pxor xmm3, xmm0pshufb xmm3, xmm15paddd xmm2, xmm3pxor xmm1, xmm2movdqa xmm11, xmm1pslld xmm1, 20psrld xmm11, 12por xmm1, xmm11paddd xmm0, xmm5paddd xmm0, xmm1pxor xmm3, xmm0pshufb xmm3, xmm14paddd xmm2, xmm3pxor xmm1, xmm2movdqa xmm11, xmm1pslld xmm1, 25psrld xmm11, 7por xmm1, xmm11pshufd xmm0, xmm0, 0x93pshufd xmm3, xmm3, 0x4Epshufd xmm2, xmm2, 0x39paddd xmm0, xmm6paddd xmm0, xmm1pxor xmm3, xmm0pshufb xmm3, xmm15paddd xmm2, xmm3pxor xmm1, xmm2movdqa xmm11, xmm1pslld xmm1, 20psrld xmm11, 12por xmm1, xmm11paddd xmm0, xmm7paddd xmm0, xmm1pxor xmm3, xmm0pshufb xmm3, xmm14paddd xmm2, xmm3pxor xmm1, xmm2movdqa xmm11, xmm1pslld xmm1, 25psrld xmm11, 7por xmm1, xmm11pshufd xmm0, xmm0, 0x39pshufd xmm3, xmm3, 0x4Epshufd xmm2, xmm2, 0x93dec aljz 9fmovdqa xmm8, xmm4shufps xmm8, xmm5, 214pshufd xmm9, xmm4, 0x0Fpshufd xmm4, xmm8, 0x39movdqa xmm8, xmm6shufps xmm8, xmm7, 250pblendw xmm9, xmm8, 0xCCmovdqa xmm8, xmm7punpcklqdq xmm8, xmm5pblendw xmm8, xmm6, 0xC0pshufd xmm8, xmm8, 0x78punpckhdq xmm5, xmm7punpckldq xmm6, xmm5pshufd xmm7, xmm6, 0x1Emovdqa xmm5, xmm9movdqa xmm6, xmm8jmp 9b9:movdqu xmm4, xmmword ptr [rdi]movdqu xmm5, xmmword ptr [rdi+0x10]pxor xmm0, xmm2pxor xmm1, xmm3pxor xmm2, xmm4pxor xmm3, xmm5movups xmmword ptr [r9], xmm0movups xmmword ptr [r9+0x10], xmm1movups xmmword ptr [r9+0x20], xmm2movups xmmword ptr [r9+0x30], xmm3ret#ifdef __APPLE__.static_data#else.section .rodata#endif.p2align 6BLAKE3_IV:.long 0x6A09E667, 0xBB67AE85.long 0x3C6EF372, 0xA54FF53AROT16:.byte 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13ROT8:.byte 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12ADD0:.long 0, 1, 2, 3ADD1:.long 4, 4, 4, 4BLAKE3_IV_0:.long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667BLAKE3_IV_1:.long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85BLAKE3_IV_2:.long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372BLAKE3_IV_3:.long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53ABLAKE3_BLOCK_LEN:.long 64, 64, 64, 64CMP_MSB_MASK:.long 0x80000000, 0x80000000, 0x80000000, 0x80000000
#if defined(__ELF__) && defined(__linux__).section .note.GNU-stack,"",%progbits#endif#if defined(__ELF__) && defined(__CET__) && defined(__has_include)#if __has_include(<cet.h>)#include <cet.h>#endif#endif#if !defined(_CET_ENDBR)#define _CET_ENDBR#endif.intel_syntax noprefix.global blake3_hash_many_sse2.global _blake3_hash_many_sse2.global blake3_compress_in_place_sse2.global _blake3_compress_in_place_sse2.global blake3_compress_xof_sse2.global _blake3_compress_xof_sse2#ifdef __APPLE__.text#else.section .text#endif.p2align 6_blake3_hash_many_sse2:blake3_hash_many_sse2:_CET_ENDBRpush r15push r14push r13push r12push rbxpush rbpmov rbp, rspsub rsp, 360and rsp, 0xFFFFFFFFFFFFFFC0neg r9dmovd xmm0, r9dpshufd xmm0, xmm0, 0x00movdqa xmmword ptr [rsp+0x130], xmm0movdqa xmm1, xmm0pand xmm1, xmmword ptr [ADD0+rip]pand xmm0, xmmword ptr [ADD1+rip]movdqa xmmword ptr [rsp+0x150], xmm0movd xmm0, r8dpshufd xmm0, xmm0, 0x00paddd xmm0, xmm1movdqa xmmword ptr [rsp+0x110], xmm0pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip]pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip]pcmpgtd xmm1, xmm0shr r8, 32movd xmm2, r8dpshufd xmm2, xmm2, 0x00psubd xmm2, xmm1movdqa xmmword ptr [rsp+0x120], xmm2mov rbx, qword ptr [rbp+0x50]mov r15, rdxshl r15, 6movzx r13d, byte ptr [rbp+0x38]movzx r12d, byte ptr [rbp+0x48]cmp rsi, 4jc 3f2:movdqu xmm3, xmmword ptr [rcx]pshufd xmm0, xmm3, 0x00pshufd xmm1, xmm3, 0x55pshufd xmm2, xmm3, 0xAApshufd xmm3, xmm3, 0xFFmovdqu xmm7, xmmword ptr [rcx+0x10]pshufd xmm4, xmm7, 0x00pshufd xmm5, xmm7, 0x55pshufd xmm6, xmm7, 0xAApshufd xmm7, xmm7, 0xFFmov r8, qword ptr [rdi]mov r9, qword ptr [rdi+0x8]mov r10, qword ptr [rdi+0x10]mov r11, qword ptr [rdi+0x18]movzx eax, byte ptr [rbp+0x40]or eax, r13dxor edx, edx9:mov r14d, eaxor eax, r12dadd rdx, 64cmp rdx, r15cmovne eax, r14dmovdqu xmm8, xmmword ptr [r8+rdx-0x40]movdqu xmm9, xmmword ptr [r9+rdx-0x40]movdqu xmm10, xmmword ptr [r10+rdx-0x40]movdqu xmm11, xmmword ptr [r11+rdx-0x40]movdqa xmm12, xmm8punpckldq xmm8, xmm9punpckhdq xmm12, xmm9movdqa xmm14, xmm10punpckldq xmm10, xmm11punpckhdq xmm14, xmm11movdqa xmm9, xmm8punpcklqdq xmm8, xmm10punpckhqdq xmm9, xmm10movdqa xmm13, xmm12punpcklqdq xmm12, xmm14punpckhqdq xmm13, xmm14movdqa xmmword ptr [rsp], xmm8movdqa xmmword ptr [rsp+0x10], xmm9movdqa xmmword ptr [rsp+0x20], xmm12movdqa xmmword ptr [rsp+0x30], xmm13movdqu xmm8, xmmword ptr [r8+rdx-0x30]movdqu xmm9, xmmword ptr [r9+rdx-0x30]movdqu xmm10, xmmword ptr [r10+rdx-0x30]movdqu xmm11, xmmword ptr [r11+rdx-0x30]movdqa xmm12, xmm8punpckldq xmm8, xmm9punpckhdq xmm12, xmm9movdqa xmm14, xmm10punpckldq xmm10, xmm11punpckhdq xmm14, xmm11movdqa xmm9, xmm8punpcklqdq xmm8, xmm10punpckhqdq xmm9, xmm10movdqa xmm13, xmm12punpcklqdq xmm12, xmm14punpckhqdq xmm13, xmm14movdqa xmmword ptr [rsp+0x40], xmm8movdqa xmmword ptr [rsp+0x50], xmm9movdqa xmmword ptr [rsp+0x60], xmm12movdqa xmmword ptr [rsp+0x70], xmm13movdqu xmm8, xmmword ptr [r8+rdx-0x20]movdqu xmm9, xmmword ptr [r9+rdx-0x20]movdqu xmm10, xmmword ptr [r10+rdx-0x20]movdqu xmm11, xmmword ptr [r11+rdx-0x20]movdqa xmm12, xmm8punpckldq xmm8, xmm9punpckhdq xmm12, xmm9movdqa xmm14, xmm10punpckldq xmm10, xmm11punpckhdq xmm14, xmm11movdqa xmm9, xmm8punpcklqdq xmm8, xmm10punpckhqdq xmm9, xmm10movdqa xmm13, xmm12punpcklqdq xmm12, xmm14punpckhqdq xmm13, xmm14movdqa xmmword ptr [rsp+0x80], xmm8movdqa xmmword ptr [rsp+0x90], xmm9movdqa xmmword ptr [rsp+0xA0], xmm12movdqa xmmword ptr [rsp+0xB0], xmm13movdqu xmm8, xmmword ptr [r8+rdx-0x10]movdqu xmm9, xmmword ptr [r9+rdx-0x10]movdqu xmm10, xmmword ptr [r10+rdx-0x10]movdqu xmm11, xmmword ptr [r11+rdx-0x10]movdqa xmm12, xmm8punpckldq xmm8, xmm9punpckhdq xmm12, xmm9movdqa xmm14, xmm10punpckldq xmm10, xmm11punpckhdq xmm14, xmm11movdqa xmm9, xmm8punpcklqdq xmm8, xmm10punpckhqdq xmm9, xmm10movdqa xmm13, xmm12punpcklqdq xmm12, xmm14punpckhqdq xmm13, xmm14movdqa xmmword ptr [rsp+0xC0], xmm8movdqa xmmword ptr [rsp+0xD0], xmm9movdqa xmmword ptr [rsp+0xE0], xmm12movdqa xmmword ptr [rsp+0xF0], xmm13movdqa xmm9, xmmword ptr [BLAKE3_IV_1+rip]movdqa xmm10, xmmword ptr [BLAKE3_IV_2+rip]movdqa xmm11, xmmword ptr [BLAKE3_IV_3+rip]movdqa xmm12, xmmword ptr [rsp+0x110]movdqa xmm13, xmmword ptr [rsp+0x120]movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip]movd xmm15, eaxpshufd xmm15, xmm15, 0x00prefetcht0 [r8+rdx+0x80]prefetcht0 [r9+rdx+0x80]prefetcht0 [r10+rdx+0x80]prefetcht0 [r11+rdx+0x80]paddd xmm0, xmmword ptr [rsp]paddd xmm1, xmmword ptr [rsp+0x20]paddd xmm2, xmmword ptr [rsp+0x40]paddd xmm3, xmmword ptr [rsp+0x60]paddd xmm0, xmm4paddd xmm1, xmm5paddd xmm2, xmm6paddd xmm3, xmm7pxor xmm12, xmm0pxor xmm13, xmm1pxor xmm14, xmm2pxor xmm15, xmm3pshuflw xmm12, xmm12, 0xB1pshufhw xmm12, xmm12, 0xB1pshuflw xmm13, xmm13, 0xB1pshufhw xmm13, xmm13, 0xB1pshuflw xmm14, xmm14, 0xB1pshufhw xmm14, xmm14, 0xB1pshuflw xmm15, xmm15, 0xB1pshufhw xmm15, xmm15, 0xB1movdqa xmm8, xmmword ptr [BLAKE3_IV_0+rip]paddd xmm8, xmm12paddd xmm9, xmm13paddd xmm10, xmm14paddd xmm11, xmm15pxor xmm4, xmm8pxor xmm5, xmm9pxor xmm6, xmm10pxor xmm7, xmm11movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm4psrld xmm8, 12pslld xmm4, 20por xmm4, xmm8movdqa xmm8, xmm5psrld xmm8, 12pslld xmm5, 20por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 12pslld xmm6, 20por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 12pslld xmm7, 20por xmm7, xmm8paddd xmm0, xmmword ptr [rsp+0x10]paddd xmm1, xmmword ptr [rsp+0x30]paddd xmm2, xmmword ptr [rsp+0x50]paddd xmm3, xmmword ptr [rsp+0x70]paddd xmm0, xmm4paddd xmm1, xmm5paddd xmm2, xmm6paddd xmm3, xmm7pxor xmm12, xmm0pxor xmm13, xmm1pxor xmm14, xmm2pxor xmm15, xmm3movdqa xmm8, xmm12psrld xmm12, 8pslld xmm8, 24pxor xmm12, xmm8movdqa xmm8, xmm13psrld xmm13, 8pslld xmm8, 24pxor xmm13, xmm8movdqa xmm8, xmm14psrld xmm14, 8pslld xmm8, 24pxor xmm14, xmm8movdqa xmm8, xmm15psrld xmm15, 8pslld xmm8, 24pxor xmm15, xmm8movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm12paddd xmm9, xmm13paddd xmm10, xmm14paddd xmm11, xmm15pxor xmm4, xmm8pxor xmm5, xmm9pxor xmm6, xmm10pxor xmm7, xmm11movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm4psrld xmm8, 7pslld xmm4, 25por xmm4, xmm8movdqa xmm8, xmm5psrld xmm8, 7pslld xmm5, 25por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 7pslld xmm6, 25por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 7pslld xmm7, 25por xmm7, xmm8paddd xmm0, xmmword ptr [rsp+0x80]paddd xmm1, xmmword ptr [rsp+0xA0]paddd xmm2, xmmword ptr [rsp+0xC0]paddd xmm3, xmmword ptr [rsp+0xE0]paddd xmm0, xmm5paddd xmm1, xmm6paddd xmm2, xmm7paddd xmm3, xmm4pxor xmm15, xmm0pxor xmm12, xmm1pxor xmm13, xmm2pxor xmm14, xmm3pshuflw xmm15, xmm15, 0xB1pshufhw xmm15, xmm15, 0xB1pshuflw xmm12, xmm12, 0xB1pshufhw xmm12, xmm12, 0xB1pshuflw xmm13, xmm13, 0xB1pshufhw xmm13, xmm13, 0xB1pshuflw xmm14, xmm14, 0xB1pshufhw xmm14, xmm14, 0xB1paddd xmm10, xmm15paddd xmm11, xmm12movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm13paddd xmm9, xmm14pxor xmm5, xmm10pxor xmm6, xmm11pxor xmm7, xmm8pxor xmm4, xmm9movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm5psrld xmm8, 12pslld xmm5, 20por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 12pslld xmm6, 20por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 12pslld xmm7, 20por xmm7, xmm8movdqa xmm8, xmm4psrld xmm8, 12pslld xmm4, 20por xmm4, xmm8paddd xmm0, xmmword ptr [rsp+0x90]paddd xmm1, xmmword ptr [rsp+0xB0]paddd xmm2, xmmword ptr [rsp+0xD0]paddd xmm3, xmmword ptr [rsp+0xF0]paddd xmm0, xmm5paddd xmm1, xmm6paddd xmm2, xmm7paddd xmm3, xmm4pxor xmm15, xmm0pxor xmm12, xmm1pxor xmm13, xmm2pxor xmm14, xmm3movdqa xmm8, xmm15psrld xmm15, 8pslld xmm8, 24pxor xmm15, xmm8movdqa xmm8, xmm12psrld xmm12, 8pslld xmm8, 24pxor xmm12, xmm8movdqa xmm8, xmm13psrld xmm13, 8pslld xmm8, 24pxor xmm13, xmm8movdqa xmm8, xmm14psrld xmm14, 8pslld xmm8, 24pxor xmm14, xmm8paddd xmm10, xmm15paddd xmm11, xmm12movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm13paddd xmm9, xmm14pxor xmm5, xmm10pxor xmm6, xmm11pxor xmm7, xmm8pxor xmm4, xmm9movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm5psrld xmm8, 7pslld xmm5, 25por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 7pslld xmm6, 25por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 7pslld xmm7, 25por xmm7, xmm8movdqa xmm8, xmm4psrld xmm8, 7pslld xmm4, 25por xmm4, xmm8paddd xmm0, xmmword ptr [rsp+0x20]paddd xmm1, xmmword ptr [rsp+0x30]paddd xmm2, xmmword ptr [rsp+0x70]paddd xmm3, xmmword ptr [rsp+0x40]paddd xmm0, xmm4paddd xmm1, xmm5paddd xmm2, xmm6paddd xmm3, xmm7pxor xmm12, xmm0pxor xmm13, xmm1pxor xmm14, xmm2pxor xmm15, xmm3pshuflw xmm12, xmm12, 0xB1pshufhw xmm12, xmm12, 0xB1pshuflw xmm13, xmm13, 0xB1pshufhw xmm13, xmm13, 0xB1pshuflw xmm14, xmm14, 0xB1pshufhw xmm14, xmm14, 0xB1pshuflw xmm15, xmm15, 0xB1pshufhw xmm15, xmm15, 0xB1movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm12paddd xmm9, xmm13paddd xmm10, xmm14paddd xmm11, xmm15pxor xmm4, xmm8pxor xmm5, xmm9pxor xmm6, xmm10pxor xmm7, xmm11movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm4psrld xmm8, 12pslld xmm4, 20por xmm4, xmm8movdqa xmm8, xmm5psrld xmm8, 12pslld xmm5, 20por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 12pslld xmm6, 20por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 12pslld xmm7, 20por xmm7, xmm8paddd xmm0, xmmword ptr [rsp+0x60]paddd xmm1, xmmword ptr [rsp+0xA0]paddd xmm2, xmmword ptr [rsp]paddd xmm3, xmmword ptr [rsp+0xD0]paddd xmm0, xmm4paddd xmm1, xmm5paddd xmm2, xmm6paddd xmm3, xmm7pxor xmm12, xmm0pxor xmm13, xmm1pxor xmm14, xmm2pxor xmm15, xmm3movdqa xmm8, xmm12psrld xmm12, 8pslld xmm8, 24pxor xmm12, xmm8movdqa xmm8, xmm13psrld xmm13, 8pslld xmm8, 24pxor xmm13, xmm8movdqa xmm8, xmm14psrld xmm14, 8pslld xmm8, 24pxor xmm14, xmm8movdqa xmm8, xmm15psrld xmm15, 8pslld xmm8, 24pxor xmm15, xmm8movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm12paddd xmm9, xmm13paddd xmm10, xmm14paddd xmm11, xmm15pxor xmm4, xmm8pxor xmm5, xmm9pxor xmm6, xmm10pxor xmm7, xmm11movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm4psrld xmm8, 7pslld xmm4, 25por xmm4, xmm8movdqa xmm8, xmm5psrld xmm8, 7pslld xmm5, 25por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 7pslld xmm6, 25por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 7pslld xmm7, 25por xmm7, xmm8paddd xmm0, xmmword ptr [rsp+0x10]paddd xmm1, xmmword ptr [rsp+0xC0]paddd xmm2, xmmword ptr [rsp+0x90]paddd xmm3, xmmword ptr [rsp+0xF0]paddd xmm0, xmm5paddd xmm1, xmm6paddd xmm2, xmm7paddd xmm3, xmm4pxor xmm15, xmm0pxor xmm12, xmm1pxor xmm13, xmm2pxor xmm14, xmm3pshuflw xmm15, xmm15, 0xB1pshufhw xmm15, xmm15, 0xB1pshuflw xmm12, xmm12, 0xB1pshufhw xmm12, xmm12, 0xB1pshuflw xmm13, xmm13, 0xB1pshufhw xmm13, xmm13, 0xB1pshuflw xmm14, xmm14, 0xB1pshufhw xmm14, xmm14, 0xB1paddd xmm10, xmm15paddd xmm11, xmm12movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm13paddd xmm9, xmm14pxor xmm5, xmm10pxor xmm6, xmm11pxor xmm7, xmm8pxor xmm4, xmm9movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm5psrld xmm8, 12pslld xmm5, 20por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 12pslld xmm6, 20por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 12pslld xmm7, 20por xmm7, xmm8movdqa xmm8, xmm4psrld xmm8, 12pslld xmm4, 20por xmm4, xmm8paddd xmm0, xmmword ptr [rsp+0xB0]paddd xmm1, xmmword ptr [rsp+0x50]paddd xmm2, xmmword ptr [rsp+0xE0]paddd xmm3, xmmword ptr [rsp+0x80]paddd xmm0, xmm5paddd xmm1, xmm6paddd xmm2, xmm7paddd xmm3, xmm4pxor xmm15, xmm0pxor xmm12, xmm1pxor xmm13, xmm2pxor xmm14, xmm3movdqa xmm8, xmm15psrld xmm15, 8pslld xmm8, 24pxor xmm15, xmm8movdqa xmm8, xmm12psrld xmm12, 8pslld xmm8, 24pxor xmm12, xmm8movdqa xmm8, xmm13psrld xmm13, 8pslld xmm8, 24pxor xmm13, xmm8movdqa xmm8, xmm14psrld xmm14, 8pslld xmm8, 24pxor xmm14, xmm8paddd xmm10, xmm15paddd xmm11, xmm12movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm13paddd xmm9, xmm14pxor xmm5, xmm10pxor xmm6, xmm11pxor xmm7, xmm8pxor xmm4, xmm9movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm5psrld xmm8, 7pslld xmm5, 25por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 7pslld xmm6, 25por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 7pslld xmm7, 25por xmm7, xmm8movdqa xmm8, xmm4psrld xmm8, 7pslld xmm4, 25por xmm4, xmm8paddd xmm0, xmmword ptr [rsp+0x30]paddd xmm1, xmmword ptr [rsp+0xA0]paddd xmm2, xmmword ptr [rsp+0xD0]paddd xmm3, xmmword ptr [rsp+0x70]paddd xmm0, xmm4paddd xmm1, xmm5paddd xmm2, xmm6paddd xmm3, xmm7pxor xmm12, xmm0pxor xmm13, xmm1pxor xmm14, xmm2pxor xmm15, xmm3pshuflw xmm12, xmm12, 0xB1pshufhw xmm12, xmm12, 0xB1pshuflw xmm13, xmm13, 0xB1pshufhw xmm13, xmm13, 0xB1pshuflw xmm14, xmm14, 0xB1pshufhw xmm14, xmm14, 0xB1pshuflw xmm15, xmm15, 0xB1pshufhw xmm15, xmm15, 0xB1movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm12paddd xmm9, xmm13paddd xmm10, xmm14paddd xmm11, xmm15pxor xmm4, xmm8pxor xmm5, xmm9pxor xmm6, xmm10pxor xmm7, xmm11movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm4psrld xmm8, 12pslld xmm4, 20por xmm4, xmm8movdqa xmm8, xmm5psrld xmm8, 12pslld xmm5, 20por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 12pslld xmm6, 20por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 12pslld xmm7, 20por xmm7, xmm8paddd xmm0, xmmword ptr [rsp+0x40]paddd xmm1, xmmword ptr [rsp+0xC0]paddd xmm2, xmmword ptr [rsp+0x20]paddd xmm3, xmmword ptr [rsp+0xE0]paddd xmm0, xmm4paddd xmm1, xmm5paddd xmm2, xmm6paddd xmm3, xmm7pxor xmm12, xmm0pxor xmm13, xmm1pxor xmm14, xmm2pxor xmm15, xmm3movdqa xmm8, xmm12psrld xmm12, 8pslld xmm8, 24pxor xmm12, xmm8movdqa xmm8, xmm13psrld xmm13, 8pslld xmm8, 24pxor xmm13, xmm8movdqa xmm8, xmm14psrld xmm14, 8pslld xmm8, 24pxor xmm14, xmm8movdqa xmm8, xmm15psrld xmm15, 8pslld xmm8, 24pxor xmm15, xmm8movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm12paddd xmm9, xmm13paddd xmm10, xmm14paddd xmm11, xmm15pxor xmm4, xmm8pxor xmm5, xmm9pxor xmm6, xmm10pxor xmm7, xmm11movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm4psrld xmm8, 7pslld xmm4, 25por xmm4, xmm8movdqa xmm8, xmm5psrld xmm8, 7pslld xmm5, 25por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 7pslld xmm6, 25por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 7pslld xmm7, 25por xmm7, xmm8paddd xmm0, xmmword ptr [rsp+0x60]paddd xmm1, xmmword ptr [rsp+0x90]paddd xmm2, xmmword ptr [rsp+0xB0]paddd xmm3, xmmword ptr [rsp+0x80]paddd xmm0, xmm5paddd xmm1, xmm6paddd xmm2, xmm7paddd xmm3, xmm4pxor xmm15, xmm0pxor xmm12, xmm1pxor xmm13, xmm2pxor xmm14, xmm3pshuflw xmm15, xmm15, 0xB1pshufhw xmm15, xmm15, 0xB1pshuflw xmm12, xmm12, 0xB1pshufhw xmm12, xmm12, 0xB1pshuflw xmm13, xmm13, 0xB1pshufhw xmm13, xmm13, 0xB1pshuflw xmm14, xmm14, 0xB1pshufhw xmm14, xmm14, 0xB1paddd xmm10, xmm15paddd xmm11, xmm12movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm13paddd xmm9, xmm14pxor xmm5, xmm10pxor xmm6, xmm11pxor xmm7, xmm8pxor xmm4, xmm9movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm5psrld xmm8, 12pslld xmm5, 20por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 12pslld xmm6, 20por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 12pslld xmm7, 20por xmm7, xmm8movdqa xmm8, xmm4psrld xmm8, 12pslld xmm4, 20por xmm4, xmm8paddd xmm0, xmmword ptr [rsp+0x50]paddd xmm1, xmmword ptr [rsp]paddd xmm2, xmmword ptr [rsp+0xF0]paddd xmm3, xmmword ptr [rsp+0x10]paddd xmm0, xmm5paddd xmm1, xmm6paddd xmm2, xmm7paddd xmm3, xmm4pxor xmm15, xmm0pxor xmm12, xmm1pxor xmm13, xmm2pxor xmm14, xmm3movdqa xmm8, xmm15psrld xmm15, 8pslld xmm8, 24pxor xmm15, xmm8movdqa xmm8, xmm12psrld xmm12, 8pslld xmm8, 24pxor xmm12, xmm8movdqa xmm8, xmm13psrld xmm13, 8pslld xmm8, 24pxor xmm13, xmm8movdqa xmm8, xmm14psrld xmm14, 8pslld xmm8, 24pxor xmm14, xmm8paddd xmm10, xmm15paddd xmm11, xmm12movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm13paddd xmm9, xmm14pxor xmm5, xmm10pxor xmm6, xmm11pxor xmm7, xmm8pxor xmm4, xmm9movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm5psrld xmm8, 7pslld xmm5, 25por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 7pslld xmm6, 25por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 7pslld xmm7, 25por xmm7, xmm8movdqa xmm8, xmm4psrld xmm8, 7pslld xmm4, 25por xmm4, xmm8paddd xmm0, xmmword ptr [rsp+0xA0]paddd xmm1, xmmword ptr [rsp+0xC0]paddd xmm2, xmmword ptr [rsp+0xE0]paddd xmm3, xmmword ptr [rsp+0xD0]paddd xmm0, xmm4paddd xmm1, xmm5paddd xmm2, xmm6paddd xmm3, xmm7pxor xmm12, xmm0pxor xmm13, xmm1pxor xmm14, xmm2pxor xmm15, xmm3pshuflw xmm12, xmm12, 0xB1pshufhw xmm12, xmm12, 0xB1pshuflw xmm13, xmm13, 0xB1pshufhw xmm13, xmm13, 0xB1pshuflw xmm14, xmm14, 0xB1pshufhw xmm14, xmm14, 0xB1pshuflw xmm15, xmm15, 0xB1pshufhw xmm15, xmm15, 0xB1movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm12paddd xmm9, xmm13paddd xmm10, xmm14paddd xmm11, xmm15pxor xmm4, xmm8pxor xmm5, xmm9pxor xmm6, xmm10pxor xmm7, xmm11movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm4psrld xmm8, 12pslld xmm4, 20por xmm4, xmm8movdqa xmm8, xmm5psrld xmm8, 12pslld xmm5, 20por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 12pslld xmm6, 20por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 12pslld xmm7, 20por xmm7, xmm8paddd xmm0, xmmword ptr [rsp+0x70]paddd xmm1, xmmword ptr [rsp+0x90]paddd xmm2, xmmword ptr [rsp+0x30]paddd xmm3, xmmword ptr [rsp+0xF0]paddd xmm0, xmm4paddd xmm1, xmm5paddd xmm2, xmm6paddd xmm3, xmm7pxor xmm12, xmm0pxor xmm13, xmm1pxor xmm14, xmm2pxor xmm15, xmm3movdqa xmm8, xmm12psrld xmm12, 8pslld xmm8, 24pxor xmm12, xmm8movdqa xmm8, xmm13psrld xmm13, 8pslld xmm8, 24pxor xmm13, xmm8movdqa xmm8, xmm14psrld xmm14, 8pslld xmm8, 24pxor xmm14, xmm8movdqa xmm8, xmm15psrld xmm15, 8pslld xmm8, 24pxor xmm15, xmm8movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm12paddd xmm9, xmm13paddd xmm10, xmm14paddd xmm11, xmm15pxor xmm4, xmm8pxor xmm5, xmm9pxor xmm6, xmm10pxor xmm7, xmm11movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm4psrld xmm8, 7pslld xmm4, 25por xmm4, xmm8movdqa xmm8, xmm5psrld xmm8, 7pslld xmm5, 25por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 7pslld xmm6, 25por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 7pslld xmm7, 25por xmm7, xmm8paddd xmm0, xmmword ptr [rsp+0x40]paddd xmm1, xmmword ptr [rsp+0xB0]paddd xmm2, xmmword ptr [rsp+0x50]paddd xmm3, xmmword ptr [rsp+0x10]paddd xmm0, xmm5paddd xmm1, xmm6paddd xmm2, xmm7paddd xmm3, xmm4pxor xmm15, xmm0pxor xmm12, xmm1pxor xmm13, xmm2pxor xmm14, xmm3pshuflw xmm15, xmm15, 0xB1pshufhw xmm15, xmm15, 0xB1pshuflw xmm12, xmm12, 0xB1pshufhw xmm12, xmm12, 0xB1pshuflw xmm13, xmm13, 0xB1pshufhw xmm13, xmm13, 0xB1pshuflw xmm14, xmm14, 0xB1pshufhw xmm14, xmm14, 0xB1paddd xmm10, xmm15paddd xmm11, xmm12movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm13paddd xmm9, xmm14pxor xmm5, xmm10pxor xmm6, xmm11pxor xmm7, xmm8pxor xmm4, xmm9movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm5psrld xmm8, 12pslld xmm5, 20por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 12pslld xmm6, 20por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 12pslld xmm7, 20por xmm7, xmm8movdqa xmm8, xmm4psrld xmm8, 12pslld xmm4, 20por xmm4, xmm8paddd xmm0, xmmword ptr [rsp]paddd xmm1, xmmword ptr [rsp+0x20]paddd xmm2, xmmword ptr [rsp+0x80]paddd xmm3, xmmword ptr [rsp+0x60]paddd xmm0, xmm5paddd xmm1, xmm6paddd xmm2, xmm7paddd xmm3, xmm4pxor xmm15, xmm0pxor xmm12, xmm1pxor xmm13, xmm2pxor xmm14, xmm3movdqa xmm8, xmm15psrld xmm15, 8pslld xmm8, 24pxor xmm15, xmm8movdqa xmm8, xmm12psrld xmm12, 8pslld xmm8, 24pxor xmm12, xmm8movdqa xmm8, xmm13psrld xmm13, 8pslld xmm8, 24pxor xmm13, xmm8movdqa xmm8, xmm14psrld xmm14, 8pslld xmm8, 24pxor xmm14, xmm8paddd xmm10, xmm15paddd xmm11, xmm12movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm13paddd xmm9, xmm14pxor xmm5, xmm10pxor xmm6, xmm11pxor xmm7, xmm8pxor xmm4, xmm9movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm5psrld xmm8, 7pslld xmm5, 25por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 7pslld xmm6, 25por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 7pslld xmm7, 25por xmm7, xmm8movdqa xmm8, xmm4psrld xmm8, 7pslld xmm4, 25por xmm4, xmm8paddd xmm0, xmmword ptr [rsp+0xC0]paddd xmm1, xmmword ptr [rsp+0x90]paddd xmm2, xmmword ptr [rsp+0xF0]paddd xmm3, xmmword ptr [rsp+0xE0]paddd xmm0, xmm4paddd xmm1, xmm5paddd xmm2, xmm6paddd xmm3, xmm7pxor xmm12, xmm0pxor xmm13, xmm1pxor xmm14, xmm2pxor xmm15, xmm3pshuflw xmm12, xmm12, 0xB1pshufhw xmm12, xmm12, 0xB1pshuflw xmm13, xmm13, 0xB1pshufhw xmm13, xmm13, 0xB1pshuflw xmm14, xmm14, 0xB1pshufhw xmm14, xmm14, 0xB1pshuflw xmm15, xmm15, 0xB1pshufhw xmm15, xmm15, 0xB1movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm12paddd xmm9, xmm13paddd xmm10, xmm14paddd xmm11, xmm15pxor xmm4, xmm8pxor xmm5, xmm9pxor xmm6, xmm10pxor xmm7, xmm11movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm4psrld xmm8, 12pslld xmm4, 20por xmm4, xmm8movdqa xmm8, xmm5psrld xmm8, 12pslld xmm5, 20por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 12pslld xmm6, 20por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 12pslld xmm7, 20por xmm7, xmm8paddd xmm0, xmmword ptr [rsp+0xD0]paddd xmm1, xmmword ptr [rsp+0xB0]paddd xmm2, xmmword ptr [rsp+0xA0]paddd xmm3, xmmword ptr [rsp+0x80]paddd xmm0, xmm4paddd xmm1, xmm5paddd xmm2, xmm6paddd xmm3, xmm7pxor xmm12, xmm0pxor xmm13, xmm1pxor xmm14, xmm2pxor xmm15, xmm3movdqa xmm8, xmm12psrld xmm12, 8pslld xmm8, 24pxor xmm12, xmm8movdqa xmm8, xmm13psrld xmm13, 8pslld xmm8, 24pxor xmm13, xmm8movdqa xmm8, xmm14psrld xmm14, 8pslld xmm8, 24pxor xmm14, xmm8movdqa xmm8, xmm15psrld xmm15, 8pslld xmm8, 24pxor xmm15, xmm8movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm12paddd xmm9, xmm13paddd xmm10, xmm14paddd xmm11, xmm15pxor xmm4, xmm8pxor xmm5, xmm9pxor xmm6, xmm10pxor xmm7, xmm11movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm4psrld xmm8, 7pslld xmm4, 25por xmm4, xmm8movdqa xmm8, xmm5psrld xmm8, 7pslld xmm5, 25por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 7pslld xmm6, 25por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 7pslld xmm7, 25por xmm7, xmm8paddd xmm0, xmmword ptr [rsp+0x70]paddd xmm1, xmmword ptr [rsp+0x50]paddd xmm2, xmmword ptr [rsp]paddd xmm3, xmmword ptr [rsp+0x60]paddd xmm0, xmm5paddd xmm1, xmm6paddd xmm2, xmm7paddd xmm3, xmm4pxor xmm15, xmm0pxor xmm12, xmm1pxor xmm13, xmm2pxor xmm14, xmm3pshuflw xmm15, xmm15, 0xB1pshufhw xmm15, xmm15, 0xB1pshuflw xmm12, xmm12, 0xB1pshufhw xmm12, xmm12, 0xB1pshuflw xmm13, xmm13, 0xB1pshufhw xmm13, xmm13, 0xB1pshuflw xmm14, xmm14, 0xB1pshufhw xmm14, xmm14, 0xB1paddd xmm10, xmm15paddd xmm11, xmm12movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm13paddd xmm9, xmm14pxor xmm5, xmm10pxor xmm6, xmm11pxor xmm7, xmm8pxor xmm4, xmm9movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm5psrld xmm8, 12pslld xmm5, 20por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 12pslld xmm6, 20por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 12pslld xmm7, 20por xmm7, xmm8movdqa xmm8, xmm4psrld xmm8, 12pslld xmm4, 20por xmm4, xmm8paddd xmm0, xmmword ptr [rsp+0x20]paddd xmm1, xmmword ptr [rsp+0x30]paddd xmm2, xmmword ptr [rsp+0x10]paddd xmm3, xmmword ptr [rsp+0x40]paddd xmm0, xmm5paddd xmm1, xmm6paddd xmm2, xmm7paddd xmm3, xmm4pxor xmm15, xmm0pxor xmm12, xmm1pxor xmm13, xmm2pxor xmm14, xmm3movdqa xmm8, xmm15psrld xmm15, 8pslld xmm8, 24pxor xmm15, xmm8movdqa xmm8, xmm12psrld xmm12, 8pslld xmm8, 24pxor xmm12, xmm8movdqa xmm8, xmm13psrld xmm13, 8pslld xmm8, 24pxor xmm13, xmm8movdqa xmm8, xmm14psrld xmm14, 8pslld xmm8, 24pxor xmm14, xmm8paddd xmm10, xmm15paddd xmm11, xmm12movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm13paddd xmm9, xmm14pxor xmm5, xmm10pxor xmm6, xmm11pxor xmm7, xmm8pxor xmm4, xmm9movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm5psrld xmm8, 7pslld xmm5, 25por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 7pslld xmm6, 25por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 7pslld xmm7, 25por xmm7, xmm8movdqa xmm8, xmm4psrld xmm8, 7pslld xmm4, 25por xmm4, xmm8paddd xmm0, xmmword ptr [rsp+0x90]paddd xmm1, xmmword ptr [rsp+0xB0]paddd xmm2, xmmword ptr [rsp+0x80]paddd xmm3, xmmword ptr [rsp+0xF0]paddd xmm0, xmm4paddd xmm1, xmm5paddd xmm2, xmm6paddd xmm3, xmm7pxor xmm12, xmm0pxor xmm13, xmm1pxor xmm14, xmm2pxor xmm15, xmm3pshuflw xmm12, xmm12, 0xB1pshufhw xmm12, xmm12, 0xB1pshuflw xmm13, xmm13, 0xB1pshufhw xmm13, xmm13, 0xB1pshuflw xmm14, xmm14, 0xB1pshufhw xmm14, xmm14, 0xB1pshuflw xmm15, xmm15, 0xB1pshufhw xmm15, xmm15, 0xB1movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm12paddd xmm9, xmm13paddd xmm10, xmm14paddd xmm11, xmm15pxor xmm4, xmm8pxor xmm5, xmm9pxor xmm6, xmm10pxor xmm7, xmm11movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm4psrld xmm8, 12pslld xmm4, 20por xmm4, xmm8movdqa xmm8, xmm5psrld xmm8, 12pslld xmm5, 20por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 12pslld xmm6, 20por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 12pslld xmm7, 20por xmm7, xmm8paddd xmm0, xmmword ptr [rsp+0xE0]paddd xmm1, xmmword ptr [rsp+0x50]paddd xmm2, xmmword ptr [rsp+0xC0]paddd xmm3, xmmword ptr [rsp+0x10]paddd xmm0, xmm4paddd xmm1, xmm5paddd xmm2, xmm6paddd xmm3, xmm7pxor xmm12, xmm0pxor xmm13, xmm1pxor xmm14, xmm2pxor xmm15, xmm3movdqa xmm8, xmm12psrld xmm12, 8pslld xmm8, 24pxor xmm12, xmm8movdqa xmm8, xmm13psrld xmm13, 8pslld xmm8, 24pxor xmm13, xmm8movdqa xmm8, xmm14psrld xmm14, 8pslld xmm8, 24pxor xmm14, xmm8movdqa xmm8, xmm15psrld xmm15, 8pslld xmm8, 24pxor xmm15, xmm8movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm12paddd xmm9, xmm13paddd xmm10, xmm14paddd xmm11, xmm15pxor xmm4, xmm8pxor xmm5, xmm9pxor xmm6, xmm10pxor xmm7, xmm11movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm4psrld xmm8, 7pslld xmm4, 25por xmm4, xmm8movdqa xmm8, xmm5psrld xmm8, 7pslld xmm5, 25por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 7pslld xmm6, 25por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 7pslld xmm7, 25por xmm7, xmm8paddd xmm0, xmmword ptr [rsp+0xD0]paddd xmm1, xmmword ptr [rsp]paddd xmm2, xmmword ptr [rsp+0x20]paddd xmm3, xmmword ptr [rsp+0x40]paddd xmm0, xmm5paddd xmm1, xmm6paddd xmm2, xmm7paddd xmm3, xmm4pxor xmm15, xmm0pxor xmm12, xmm1pxor xmm13, xmm2pxor xmm14, xmm3pshuflw xmm15, xmm15, 0xB1pshufhw xmm15, xmm15, 0xB1pshuflw xmm12, xmm12, 0xB1pshufhw xmm12, xmm12, 0xB1pshuflw xmm13, xmm13, 0xB1pshufhw xmm13, xmm13, 0xB1pshuflw xmm14, xmm14, 0xB1pshufhw xmm14, xmm14, 0xB1paddd xmm10, xmm15paddd xmm11, xmm12movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm13paddd xmm9, xmm14pxor xmm5, xmm10pxor xmm6, xmm11pxor xmm7, xmm8pxor xmm4, xmm9movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm5psrld xmm8, 12pslld xmm5, 20por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 12pslld xmm6, 20por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 12pslld xmm7, 20por xmm7, xmm8movdqa xmm8, xmm4psrld xmm8, 12pslld xmm4, 20por xmm4, xmm8paddd xmm0, xmmword ptr [rsp+0x30]paddd xmm1, xmmword ptr [rsp+0xA0]paddd xmm2, xmmword ptr [rsp+0x60]paddd xmm3, xmmword ptr [rsp+0x70]paddd xmm0, xmm5paddd xmm1, xmm6paddd xmm2, xmm7paddd xmm3, xmm4pxor xmm15, xmm0pxor xmm12, xmm1pxor xmm13, xmm2pxor xmm14, xmm3movdqa xmm8, xmm15psrld xmm15, 8pslld xmm8, 24pxor xmm15, xmm8movdqa xmm8, xmm12psrld xmm12, 8pslld xmm8, 24pxor xmm12, xmm8movdqa xmm8, xmm13psrld xmm13, 8pslld xmm8, 24pxor xmm13, xmm8movdqa xmm8, xmm14psrld xmm14, 8pslld xmm8, 24pxor xmm14, xmm8paddd xmm10, xmm15paddd xmm11, xmm12movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm13paddd xmm9, xmm14pxor xmm5, xmm10pxor xmm6, xmm11pxor xmm7, xmm8pxor xmm4, xmm9movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm5psrld xmm8, 7pslld xmm5, 25por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 7pslld xmm6, 25por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 7pslld xmm7, 25por xmm7, xmm8movdqa xmm8, xmm4psrld xmm8, 7pslld xmm4, 25por xmm4, xmm8paddd xmm0, xmmword ptr [rsp+0xB0]paddd xmm1, xmmword ptr [rsp+0x50]paddd xmm2, xmmword ptr [rsp+0x10]paddd xmm3, xmmword ptr [rsp+0x80]paddd xmm0, xmm4paddd xmm1, xmm5paddd xmm2, xmm6paddd xmm3, xmm7pxor xmm12, xmm0pxor xmm13, xmm1pxor xmm14, xmm2pxor xmm15, xmm3pshuflw xmm12, xmm12, 0xB1pshufhw xmm12, xmm12, 0xB1pshuflw xmm13, xmm13, 0xB1pshufhw xmm13, xmm13, 0xB1pshuflw xmm14, xmm14, 0xB1pshufhw xmm14, xmm14, 0xB1pshuflw xmm15, xmm15, 0xB1pshufhw xmm15, xmm15, 0xB1movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm12paddd xmm9, xmm13paddd xmm10, xmm14paddd xmm11, xmm15pxor xmm4, xmm8pxor xmm5, xmm9pxor xmm6, xmm10pxor xmm7, xmm11movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm4psrld xmm8, 12pslld xmm4, 20por xmm4, xmm8movdqa xmm8, xmm5psrld xmm8, 12pslld xmm5, 20por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 12pslld xmm6, 20por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 12pslld xmm7, 20por xmm7, xmm8paddd xmm0, xmmword ptr [rsp+0xF0]paddd xmm1, xmmword ptr [rsp]paddd xmm2, xmmword ptr [rsp+0x90]paddd xmm3, xmmword ptr [rsp+0x60]paddd xmm0, xmm4paddd xmm1, xmm5paddd xmm2, xmm6paddd xmm3, xmm7pxor xmm12, xmm0pxor xmm13, xmm1pxor xmm14, xmm2pxor xmm15, xmm3movdqa xmm8, xmm12psrld xmm12, 8pslld xmm8, 24pxor xmm12, xmm8movdqa xmm8, xmm13psrld xmm13, 8pslld xmm8, 24pxor xmm13, xmm8movdqa xmm8, xmm14psrld xmm14, 8pslld xmm8, 24pxor xmm14, xmm8movdqa xmm8, xmm15psrld xmm15, 8pslld xmm8, 24pxor xmm15, xmm8movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm12paddd xmm9, xmm13paddd xmm10, xmm14paddd xmm11, xmm15pxor xmm4, xmm8pxor xmm5, xmm9pxor xmm6, xmm10pxor xmm7, xmm11movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm4psrld xmm8, 7pslld xmm4, 25por xmm4, xmm8movdqa xmm8, xmm5psrld xmm8, 7pslld xmm5, 25por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 7pslld xmm6, 25por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 7pslld xmm7, 25por xmm7, xmm8paddd xmm0, xmmword ptr [rsp+0xE0]paddd xmm1, xmmword ptr [rsp+0x20]paddd xmm2, xmmword ptr [rsp+0x30]paddd xmm3, xmmword ptr [rsp+0x70]paddd xmm0, xmm5paddd xmm1, xmm6paddd xmm2, xmm7paddd xmm3, xmm4pxor xmm15, xmm0pxor xmm12, xmm1pxor xmm13, xmm2pxor xmm14, xmm3pshuflw xmm15, xmm15, 0xB1pshufhw xmm15, xmm15, 0xB1pshuflw xmm12, xmm12, 0xB1pshufhw xmm12, xmm12, 0xB1pshuflw xmm13, xmm13, 0xB1pshufhw xmm13, xmm13, 0xB1pshuflw xmm14, xmm14, 0xB1pshufhw xmm14, xmm14, 0xB1paddd xmm10, xmm15paddd xmm11, xmm12movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm13paddd xmm9, xmm14pxor xmm5, xmm10pxor xmm6, xmm11pxor xmm7, xmm8pxor xmm4, xmm9movdqa xmmword ptr [rsp+0x100], xmm8movdqa xmm8, xmm5psrld xmm8, 12pslld xmm5, 20por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 12pslld xmm6, 20por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 12pslld xmm7, 20por xmm7, xmm8movdqa xmm8, xmm4psrld xmm8, 12pslld xmm4, 20por xmm4, xmm8paddd xmm0, xmmword ptr [rsp+0xA0]paddd xmm1, xmmword ptr [rsp+0xC0]paddd xmm2, xmmword ptr [rsp+0x40]paddd xmm3, xmmword ptr [rsp+0xD0]paddd xmm0, xmm5paddd xmm1, xmm6paddd xmm2, xmm7paddd xmm3, xmm4pxor xmm15, xmm0pxor xmm12, xmm1pxor xmm13, xmm2pxor xmm14, xmm3movdqa xmm8, xmm15psrld xmm15, 8pslld xmm8, 24pxor xmm15, xmm8movdqa xmm8, xmm12psrld xmm12, 8pslld xmm8, 24pxor xmm12, xmm8movdqa xmm8, xmm13psrld xmm13, 8pslld xmm8, 24pxor xmm13, xmm8movdqa xmm8, xmm14psrld xmm14, 8pslld xmm8, 24pxor xmm14, xmm8paddd xmm10, xmm15paddd xmm11, xmm12movdqa xmm8, xmmword ptr [rsp+0x100]paddd xmm8, xmm13paddd xmm9, xmm14pxor xmm5, xmm10pxor xmm6, xmm11pxor xmm7, xmm8pxor xmm4, xmm9pxor xmm0, xmm8pxor xmm1, xmm9pxor xmm2, xmm10pxor xmm3, xmm11movdqa xmm8, xmm5psrld xmm8, 7pslld xmm5, 25por xmm5, xmm8movdqa xmm8, xmm6psrld xmm8, 7pslld xmm6, 25por xmm6, xmm8movdqa xmm8, xmm7psrld xmm8, 7pslld xmm7, 25por xmm7, xmm8movdqa xmm8, xmm4psrld xmm8, 7pslld xmm4, 25por xmm4, xmm8pxor xmm4, xmm12pxor xmm5, xmm13pxor xmm6, xmm14pxor xmm7, xmm15mov eax, r13djne 9bmovdqa xmm9, xmm0punpckldq xmm0, xmm1punpckhdq xmm9, xmm1movdqa xmm11, xmm2punpckldq xmm2, xmm3punpckhdq xmm11, xmm3movdqa xmm1, xmm0punpcklqdq xmm0, xmm2punpckhqdq xmm1, xmm2movdqa xmm3, xmm9punpcklqdq xmm9, xmm11punpckhqdq xmm3, xmm11movdqu xmmword ptr [rbx], xmm0movdqu xmmword ptr [rbx+0x20], xmm1movdqu xmmword ptr [rbx+0x40], xmm9movdqu xmmword ptr [rbx+0x60], xmm3movdqa xmm9, xmm4punpckldq xmm4, xmm5punpckhdq xmm9, xmm5movdqa xmm11, xmm6punpckldq xmm6, xmm7punpckhdq xmm11, xmm7movdqa xmm5, xmm4punpcklqdq xmm4, xmm6punpckhqdq xmm5, xmm6movdqa xmm7, xmm9punpcklqdq xmm9, xmm11punpckhqdq xmm7, xmm11movdqu xmmword ptr [rbx+0x10], xmm4movdqu xmmword ptr [rbx+0x30], xmm5movdqu xmmword ptr [rbx+0x50], xmm9movdqu xmmword ptr [rbx+0x70], xmm7movdqa xmm1, xmmword ptr [rsp+0x110]movdqa xmm0, xmm1paddd xmm1, xmmword ptr [rsp+0x150]movdqa xmmword ptr [rsp+0x110], xmm1pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip]pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip]pcmpgtd xmm0, xmm1movdqa xmm1, xmmword ptr [rsp+0x120]psubd xmm1, xmm0movdqa xmmword ptr [rsp+0x120], xmm1add rbx, 128add rdi, 32sub rsi, 4cmp rsi, 4jnc 2btest rsi, rsijnz 3f4:mov rsp, rbppop rbppop rbxpop r12pop r13pop r14pop r15ret.p2align 53:test esi, 0x2je 3fmovups xmm0, xmmword ptr [rcx]movups xmm1, xmmword ptr [rcx+0x10]movaps xmm8, xmm0movaps xmm9, xmm1movd xmm13, dword ptr [rsp+0x110]movd xmm14, dword ptr [rsp+0x120]punpckldq xmm13, xmm14movaps xmmword ptr [rsp], xmm13movd xmm14, dword ptr [rsp+0x114]movd xmm13, dword ptr [rsp+0x124]punpckldq xmm14, xmm13movaps xmmword ptr [rsp+0x10], xmm14mov r8, qword ptr [rdi]mov r9, qword ptr [rdi+0x8]movzx eax, byte ptr [rbp+0x40]or eax, r13dxor edx, edx2:mov r14d, eaxor eax, r12dadd rdx, 64cmp rdx, r15cmovne eax, r14dmovaps xmm2, xmmword ptr [BLAKE3_IV+rip]movaps xmm10, xmm2movups xmm4, xmmword ptr [r8+rdx-0x40]movups xmm5, xmmword ptr [r8+rdx-0x30]movaps xmm3, xmm4shufps xmm4, xmm5, 136shufps xmm3, xmm5, 221movaps xmm5, xmm3movups xmm6, xmmword ptr [r8+rdx-0x20]movups xmm7, xmmword ptr [r8+rdx-0x10]movaps xmm3, xmm6shufps xmm6, xmm7, 136pshufd xmm6, xmm6, 0x93shufps xmm3, xmm7, 221pshufd xmm7, xmm3, 0x93movups xmm12, xmmword ptr [r9+rdx-0x40]movups xmm13, xmmword ptr [r9+rdx-0x30]movaps xmm11, xmm12shufps xmm12, xmm13, 136shufps xmm11, xmm13, 221movaps xmm13, xmm11movups xmm14, xmmword ptr [r9+rdx-0x20]movups xmm15, xmmword ptr [r9+rdx-0x10]movaps xmm11, xmm14shufps xmm14, xmm15, 136pshufd xmm14, xmm14, 0x93shufps xmm11, xmm15, 221pshufd xmm15, xmm11, 0x93shl rax, 0x20or rax, 0x40movq xmm3, raxmovdqa xmmword ptr [rsp+0x20], xmm3movaps xmm3, xmmword ptr [rsp]movaps xmm11, xmmword ptr [rsp+0x10]punpcklqdq xmm3, xmmword ptr [rsp+0x20]punpcklqdq xmm11, xmmword ptr [rsp+0x20]mov al, 79:paddd xmm0, xmm4paddd xmm8, xmm12movaps xmmword ptr [rsp+0x20], xmm4movaps xmmword ptr [rsp+0x30], xmm12paddd xmm0, xmm1paddd xmm8, xmm9pxor xmm3, xmm0pxor xmm11, xmm8pshuflw xmm3, xmm3, 0xB1pshufhw xmm3, xmm3, 0xB1pshuflw xmm11, xmm11, 0xB1pshufhw xmm11, xmm11, 0xB1paddd xmm2, xmm3paddd xmm10, xmm11pxor xmm1, xmm2pxor xmm9, xmm10movdqa xmm4, xmm1pslld xmm1, 20psrld xmm4, 12por xmm1, xmm4movdqa xmm4, xmm9pslld xmm9, 20psrld xmm4, 12por xmm9, xmm4paddd xmm0, xmm5paddd xmm8, xmm13movaps xmmword ptr [rsp+0x40], xmm5movaps xmmword ptr [rsp+0x50], xmm13paddd xmm0, xmm1paddd xmm8, xmm9pxor xmm3, xmm0pxor xmm11, xmm8movdqa xmm13, xmm3psrld xmm3, 8pslld xmm13, 24pxor xmm3, xmm13movdqa xmm13, xmm11psrld xmm11, 8pslld xmm13, 24pxor xmm11, xmm13paddd xmm2, xmm3paddd xmm10, xmm11pxor xmm1, xmm2pxor xmm9, xmm10movdqa xmm4, xmm1pslld xmm1, 25psrld xmm4, 7por xmm1, xmm4movdqa xmm4, xmm9pslld xmm9, 25psrld xmm4, 7por xmm9, xmm4pshufd xmm0, xmm0, 0x93pshufd xmm8, xmm8, 0x93pshufd xmm3, xmm3, 0x4Epshufd xmm11, xmm11, 0x4Epshufd xmm2, xmm2, 0x39pshufd xmm10, xmm10, 0x39paddd xmm0, xmm6paddd xmm8, xmm14paddd xmm0, xmm1paddd xmm8, xmm9pxor xmm3, xmm0pxor xmm11, xmm8pshuflw xmm3, xmm3, 0xB1pshufhw xmm3, xmm3, 0xB1pshuflw xmm11, xmm11, 0xB1pshufhw xmm11, xmm11, 0xB1paddd xmm2, xmm3paddd xmm10, xmm11pxor xmm1, xmm2pxor xmm9, xmm10movdqa xmm4, xmm1pslld xmm1, 20psrld xmm4, 12por xmm1, xmm4movdqa xmm4, xmm9pslld xmm9, 20psrld xmm4, 12por xmm9, xmm4paddd xmm0, xmm7paddd xmm8, xmm15paddd xmm0, xmm1paddd xmm8, xmm9pxor xmm3, xmm0pxor xmm11, xmm8movdqa xmm13, xmm3psrld xmm3, 8pslld xmm13, 24pxor xmm3, xmm13movdqa xmm13, xmm11psrld xmm11, 8pslld xmm13, 24pxor xmm11, xmm13paddd xmm2, xmm3paddd xmm10, xmm11pxor xmm1, xmm2pxor xmm9, xmm10movdqa xmm4, xmm1pslld xmm1, 25psrld xmm4, 7por xmm1, xmm4movdqa xmm4, xmm9pslld xmm9, 25psrld xmm4, 7por xmm9, xmm4pshufd xmm0, xmm0, 0x39pshufd xmm8, xmm8, 0x39pshufd xmm3, xmm3, 0x4Epshufd xmm11, xmm11, 0x4Epshufd xmm2, xmm2, 0x93pshufd xmm10, xmm10, 0x93dec alje 9fmovdqa xmm12, xmmword ptr [rsp+0x20]movdqa xmm5, xmmword ptr [rsp+0x40]pshufd xmm13, xmm12, 0x0Fshufps xmm12, xmm5, 214pshufd xmm4, xmm12, 0x39movdqa xmm12, xmm6shufps xmm12, xmm7, 250pand xmm13, xmmword ptr [PBLENDW_0x33_MASK+rip]pand xmm12, xmmword ptr [PBLENDW_0xCC_MASK+rip]por xmm13, xmm12movdqa xmmword ptr [rsp+0x20], xmm13movdqa xmm12, xmm7punpcklqdq xmm12, xmm5movdqa xmm13, xmm6pand xmm12, xmmword ptr [PBLENDW_0x3F_MASK+rip]pand xmm13, xmmword ptr [PBLENDW_0xC0_MASK+rip]por xmm12, xmm13pshufd xmm12, xmm12, 0x78punpckhdq xmm5, xmm7punpckldq xmm6, xmm5pshufd xmm7, xmm6, 0x1Emovdqa xmmword ptr [rsp+0x40], xmm12movdqa xmm5, xmmword ptr [rsp+0x30]movdqa xmm13, xmmword ptr [rsp+0x50]pshufd xmm6, xmm5, 0x0Fshufps xmm5, xmm13, 214pshufd xmm12, xmm5, 0x39movdqa xmm5, xmm14shufps xmm5, xmm15, 250pand xmm6, xmmword ptr [PBLENDW_0x33_MASK+rip]pand xmm5, xmmword ptr [PBLENDW_0xCC_MASK+rip]por xmm6, xmm5movdqa xmm5, xmm15punpcklqdq xmm5, xmm13movdqa xmmword ptr [rsp+0x30], xmm2movdqa xmm2, xmm14pand xmm5, xmmword ptr [PBLENDW_0x3F_MASK+rip]pand xmm2, xmmword ptr [PBLENDW_0xC0_MASK+rip]por xmm5, xmm2movdqa xmm2, xmmword ptr [rsp+0x30]pshufd xmm5, xmm5, 0x78punpckhdq xmm13, xmm15punpckldq xmm14, xmm13pshufd xmm15, xmm14, 0x1Emovdqa xmm13, xmm6movdqa xmm14, xmm5movdqa xmm5, xmmword ptr [rsp+0x20]movdqa xmm6, xmmword ptr [rsp+0x40]jmp 9b9:pxor xmm0, xmm2pxor xmm1, xmm3pxor xmm8, xmm10pxor xmm9, xmm11mov eax, r13dcmp rdx, r15jne 2bmovups xmmword ptr [rbx], xmm0movups xmmword ptr [rbx+0x10], xmm1movups xmmword ptr [rbx+0x20], xmm8movups xmmword ptr [rbx+0x30], xmm9mov eax, dword ptr [rsp+0x130]neg eaxmov r10d, dword ptr [rsp+0x110+8*rax]mov r11d, dword ptr [rsp+0x120+8*rax]mov dword ptr [rsp+0x110], r10dmov dword ptr [rsp+0x120], r11dadd rdi, 16add rbx, 64sub rsi, 23:test esi, 0x1je 4bmovups xmm0, xmmword ptr [rcx]movups xmm1, xmmword ptr [rcx+0x10]movd xmm13, dword ptr [rsp+0x110]movd xmm14, dword ptr [rsp+0x120]punpckldq xmm13, xmm14mov r8, qword ptr [rdi]movzx eax, byte ptr [rbp+0x40]or eax, r13dxor edx, edx2:mov r14d, eaxor eax, r12dadd rdx, 64cmp rdx, r15cmovne eax, r14dmovaps xmm2, xmmword ptr [BLAKE3_IV+rip]shl rax, 32or rax, 64movq xmm12, raxmovdqa xmm3, xmm13punpcklqdq xmm3, xmm12movups xmm4, xmmword ptr [r8+rdx-0x40]movups xmm5, xmmword ptr [r8+rdx-0x30]movaps xmm8, xmm4shufps xmm4, xmm5, 136shufps xmm8, xmm5, 221movaps xmm5, xmm8movups xmm6, xmmword ptr [r8+rdx-0x20]movups xmm7, xmmword ptr [r8+rdx-0x10]movaps xmm8, xmm6shufps xmm6, xmm7, 136pshufd xmm6, xmm6, 0x93shufps xmm8, xmm7, 221pshufd xmm7, xmm8, 0x93mov al, 79:paddd xmm0, xmm4paddd xmm0, xmm1pxor xmm3, xmm0pshuflw xmm3, xmm3, 0xB1pshufhw xmm3, xmm3, 0xB1paddd xmm2, xmm3pxor xmm1, xmm2movdqa xmm11, xmm1pslld xmm1, 20psrld xmm11, 12por xmm1, xmm11paddd xmm0, xmm5paddd xmm0, xmm1pxor xmm3, xmm0movdqa xmm14, xmm3psrld xmm3, 8pslld xmm14, 24pxor xmm3, xmm14paddd xmm2, xmm3pxor xmm1, xmm2movdqa xmm11, xmm1pslld xmm1, 25psrld xmm11, 7por xmm1, xmm11pshufd xmm0, xmm0, 0x93pshufd xmm3, xmm3, 0x4Epshufd xmm2, xmm2, 0x39paddd xmm0, xmm6paddd xmm0, xmm1pxor xmm3, xmm0pshuflw xmm3, xmm3, 0xB1pshufhw xmm3, xmm3, 0xB1paddd xmm2, xmm3pxor xmm1, xmm2movdqa xmm11, xmm1pslld xmm1, 20psrld xmm11, 12por xmm1, xmm11paddd xmm0, xmm7paddd xmm0, xmm1pxor xmm3, xmm0movdqa xmm14, xmm3psrld xmm3, 8pslld xmm14, 24pxor xmm3, xmm14paddd xmm2, xmm3pxor xmm1, xmm2movdqa xmm11, xmm1pslld xmm1, 25psrld xmm11, 7por xmm1, xmm11pshufd xmm0, xmm0, 0x39pshufd xmm3, xmm3, 0x4Epshufd xmm2, xmm2, 0x93dec aljz 9fmovdqa xmm8, xmm4shufps xmm8, xmm5, 214pshufd xmm9, xmm4, 0x0Fpshufd xmm4, xmm8, 0x39movdqa xmm8, xmm6shufps xmm8, xmm7, 250pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip]pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip]por xmm9, xmm8movdqa xmm8, xmm7punpcklqdq xmm8, xmm5movdqa xmm10, xmm6pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip]por xmm8, xmm10pshufd xmm8, xmm8, 0x78punpckhdq xmm5, xmm7punpckldq xmm6, xmm5pshufd xmm7, xmm6, 0x1Emovdqa xmm5, xmm9movdqa xmm6, xmm8jmp 9b9:pxor xmm0, xmm2pxor xmm1, xmm3mov eax, r13dcmp rdx, r15jne 2bmovups xmmword ptr [rbx], xmm0movups xmmword ptr [rbx+0x10], xmm1jmp 4b.p2align 6blake3_compress_in_place_sse2:_blake3_compress_in_place_sse2:_CET_ENDBRmovups xmm0, xmmword ptr [rdi]movups xmm1, xmmword ptr [rdi+0x10]movaps xmm2, xmmword ptr [BLAKE3_IV+rip]shl r8, 32add rdx, r8movq xmm3, rcxmovq xmm4, rdxpunpcklqdq xmm3, xmm4movups xmm4, xmmword ptr [rsi]movups xmm5, xmmword ptr [rsi+0x10]movaps xmm8, xmm4shufps xmm4, xmm5, 136shufps xmm8, xmm5, 221movaps xmm5, xmm8movups xmm6, xmmword ptr [rsi+0x20]movups xmm7, xmmword ptr [rsi+0x30]movaps xmm8, xmm6shufps xmm6, xmm7, 136pshufd xmm6, xmm6, 0x93shufps xmm8, xmm7, 221pshufd xmm7, xmm8, 0x93mov al, 79:paddd xmm0, xmm4paddd xmm0, xmm1pxor xmm3, xmm0pshuflw xmm3, xmm3, 0xB1pshufhw xmm3, xmm3, 0xB1paddd xmm2, xmm3pxor xmm1, xmm2movdqa xmm11, xmm1pslld xmm1, 20psrld xmm11, 12por xmm1, xmm11paddd xmm0, xmm5paddd xmm0, xmm1pxor xmm3, xmm0movdqa xmm14, xmm3psrld xmm3, 8pslld xmm14, 24pxor xmm3, xmm14paddd xmm2, xmm3pxor xmm1, xmm2movdqa xmm11, xmm1pslld xmm1, 25psrld xmm11, 7por xmm1, xmm11pshufd xmm0, xmm0, 0x93pshufd xmm3, xmm3, 0x4Epshufd xmm2, xmm2, 0x39paddd xmm0, xmm6paddd xmm0, xmm1pxor xmm3, xmm0pshuflw xmm3, xmm3, 0xB1pshufhw xmm3, xmm3, 0xB1paddd xmm2, xmm3pxor xmm1, xmm2movdqa xmm11, xmm1pslld xmm1, 20psrld xmm11, 12por xmm1, xmm11paddd xmm0, xmm7paddd xmm0, xmm1pxor xmm3, xmm0movdqa xmm14, xmm3psrld xmm3, 8pslld xmm14, 24pxor xmm3, xmm14paddd xmm2, xmm3pxor xmm1, xmm2movdqa xmm11, xmm1pslld xmm1, 25psrld xmm11, 7por xmm1, xmm11pshufd xmm0, xmm0, 0x39pshufd xmm3, xmm3, 0x4Epshufd xmm2, xmm2, 0x93dec aljz 9fmovdqa xmm8, xmm4shufps xmm8, xmm5, 214pshufd xmm9, xmm4, 0x0Fpshufd xmm4, xmm8, 0x39movdqa xmm8, xmm6shufps xmm8, xmm7, 250pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip]pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip]por xmm9, xmm8movdqa xmm8, xmm7punpcklqdq xmm8, xmm5movdqa xmm10, xmm6pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip]por xmm8, xmm10pshufd xmm8, xmm8, 0x78punpckhdq xmm5, xmm7punpckldq xmm6, xmm5pshufd xmm7, xmm6, 0x1Emovdqa xmm5, xmm9movdqa xmm6, xmm8jmp 9b9:pxor xmm0, xmm2pxor xmm1, xmm3movups xmmword ptr [rdi], xmm0movups xmmword ptr [rdi+0x10], xmm1ret.p2align 6blake3_compress_xof_sse2:_blake3_compress_xof_sse2:_CET_ENDBRmovups xmm0, xmmword ptr [rdi]movups xmm1, xmmword ptr [rdi+0x10]movaps xmm2, xmmword ptr [BLAKE3_IV+rip]movzx eax, r8bmovzx edx, dlshl rax, 32add rdx, raxmovq xmm3, rcxmovq xmm4, rdxpunpcklqdq xmm3, xmm4movups xmm4, xmmword ptr [rsi]movups xmm5, xmmword ptr [rsi+0x10]movaps xmm8, xmm4shufps xmm4, xmm5, 136shufps xmm8, xmm5, 221movaps xmm5, xmm8movups xmm6, xmmword ptr [rsi+0x20]movups xmm7, xmmword ptr [rsi+0x30]movaps xmm8, xmm6shufps xmm6, xmm7, 136pshufd xmm6, xmm6, 0x93shufps xmm8, xmm7, 221pshufd xmm7, xmm8, 0x93mov al, 79:paddd xmm0, xmm4paddd xmm0, xmm1pxor xmm3, xmm0pshuflw xmm3, xmm3, 0xB1pshufhw xmm3, xmm3, 0xB1paddd xmm2, xmm3pxor xmm1, xmm2movdqa xmm11, xmm1pslld xmm1, 20psrld xmm11, 12por xmm1, xmm11paddd xmm0, xmm5paddd xmm0, xmm1pxor xmm3, xmm0movdqa xmm14, xmm3psrld xmm3, 8pslld xmm14, 24pxor xmm3, xmm14paddd xmm2, xmm3pxor xmm1, xmm2movdqa xmm11, xmm1pslld xmm1, 25psrld xmm11, 7por xmm1, xmm11pshufd xmm0, xmm0, 0x93pshufd xmm3, xmm3, 0x4Epshufd xmm2, xmm2, 0x39paddd xmm0, xmm6paddd xmm0, xmm1pxor xmm3, xmm0pshuflw xmm3, xmm3, 0xB1pshufhw xmm3, xmm3, 0xB1paddd xmm2, xmm3pxor xmm1, xmm2movdqa xmm11, xmm1pslld xmm1, 20psrld xmm11, 12por xmm1, xmm11paddd xmm0, xmm7paddd xmm0, xmm1pxor xmm3, xmm0movdqa xmm14, xmm3psrld xmm3, 8pslld xmm14, 24pxor xmm3, xmm14paddd xmm2, xmm3pxor xmm1, xmm2movdqa xmm11, xmm1pslld xmm1, 25psrld xmm11, 7por xmm1, xmm11pshufd xmm0, xmm0, 0x39pshufd xmm3, xmm3, 0x4Epshufd xmm2, xmm2, 0x93dec aljz 9fmovdqa xmm8, xmm4shufps xmm8, xmm5, 214pshufd xmm9, xmm4, 0x0Fpshufd xmm4, xmm8, 0x39movdqa xmm8, xmm6shufps xmm8, xmm7, 250pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip]pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip]por xmm9, xmm8movdqa xmm8, xmm7punpcklqdq xmm8, xmm5movdqa xmm10, xmm6pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip]por xmm8, xmm10pshufd xmm8, xmm8, 0x78punpckhdq xmm5, xmm7punpckldq xmm6, xmm5pshufd xmm7, xmm6, 0x1Emovdqa xmm5, xmm9movdqa xmm6, xmm8jmp 9b9:movdqu xmm4, xmmword ptr [rdi]movdqu xmm5, xmmword ptr [rdi+0x10]pxor xmm0, xmm2pxor xmm1, xmm3pxor xmm2, xmm4pxor xmm3, xmm5movups xmmword ptr [r9], xmm0movups xmmword ptr [r9+0x10], xmm1movups xmmword ptr [r9+0x20], xmm2movups xmmword ptr [r9+0x30], xmm3ret#ifdef __APPLE__.static_data#else.section .rodata#endif.p2align 6BLAKE3_IV:.long 0x6A09E667, 0xBB67AE85.long 0x3C6EF372, 0xA54FF53AADD0:.long 0, 1, 2, 3ADD1:.long 4, 4, 4, 4BLAKE3_IV_0:.long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667BLAKE3_IV_1:.long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85BLAKE3_IV_2:.long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372BLAKE3_IV_3:.long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53ABLAKE3_BLOCK_LEN:.long 64, 64, 64, 64CMP_MSB_MASK:.long 0x80000000, 0x80000000, 0x80000000, 0x80000000PBLENDW_0x33_MASK:.long 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000PBLENDW_0xCC_MASK:.long 0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFFPBLENDW_0x3F_MASK:.long 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000PBLENDW_0xC0_MASK:.long 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF
#if defined(__ELF__) && defined(__linux__).section .note.GNU-stack,"",%progbits#endif#if defined(__ELF__) && defined(__CET__) && defined(__has_include)#if __has_include(<cet.h>)#include <cet.h>#endif#endif#if !defined(_CET_ENDBR)#define _CET_ENDBR#endif.intel_syntax noprefix.global _blake3_hash_many_avx512.global blake3_hash_many_avx512.global blake3_compress_in_place_avx512.global _blake3_compress_in_place_avx512.global blake3_compress_xof_avx512.global _blake3_compress_xof_avx512#ifdef __APPLE__.text#else.section .text#endif.p2align 6_blake3_hash_many_avx512:blake3_hash_many_avx512:_CET_ENDBRpush r15push r14push r13push r12push rbxpush rbpmov rbp, rspsub rsp, 144and rsp, 0xFFFFFFFFFFFFFFC0neg r9kmovw k1, r9dvmovd xmm0, r8dvpbroadcastd ymm0, xmm0shr r8, 32vmovd xmm1, r8dvpbroadcastd ymm1, xmm1vmovdqa ymm4, ymm1vmovdqa ymm5, ymm1vpaddd ymm2, ymm0, ymmword ptr [ADD0+rip]vpaddd ymm3, ymm0, ymmword ptr [ADD0+32+rip]vpcmpltud k2, ymm2, ymm0vpcmpltud k3, ymm3, ymm0vpaddd ymm4 {k2}, ymm4, dword ptr [ADD1+rip] {1to8}vpaddd ymm5 {k3}, ymm5, dword ptr [ADD1+rip] {1to8}knotw k2, k1vmovdqa32 ymm2 {k2}, ymm0vmovdqa32 ymm3 {k2}, ymm0vmovdqa32 ymm4 {k2}, ymm1vmovdqa32 ymm5 {k2}, ymm1vmovdqa ymmword ptr [rsp], ymm2vmovdqa ymmword ptr [rsp+0x1*0x20], ymm3vmovdqa ymmword ptr [rsp+0x2*0x20], ymm4vmovdqa ymmword ptr [rsp+0x3*0x20], ymm5shl rdx, 6mov qword ptr [rsp+0x80], rdxcmp rsi, 16jc 3f2:vpbroadcastd zmm0, dword ptr [rcx]vpbroadcastd zmm1, dword ptr [rcx+0x1*0x4]vpbroadcastd zmm2, dword ptr [rcx+0x2*0x4]vpbroadcastd zmm3, dword ptr [rcx+0x3*0x4]vpbroadcastd zmm4, dword ptr [rcx+0x4*0x4]vpbroadcastd zmm5, dword ptr [rcx+0x5*0x4]vpbroadcastd zmm6, dword ptr [rcx+0x6*0x4]vpbroadcastd zmm7, dword ptr [rcx+0x7*0x4]movzx eax, byte ptr [rbp+0x38]movzx ebx, byte ptr [rbp+0x40]or eax, ebxxor edx, edx.p2align 59:movzx ebx, byte ptr [rbp+0x48]or ebx, eaxadd rdx, 64cmp rdx, qword ptr [rsp+0x80]cmove eax, ebxmov dword ptr [rsp+0x88], eaxmov r8, qword ptr [rdi]mov r9, qword ptr [rdi+0x8]mov r10, qword ptr [rdi+0x10]mov r11, qword ptr [rdi+0x18]mov r12, qword ptr [rdi+0x40]mov r13, qword ptr [rdi+0x48]mov r14, qword ptr [rdi+0x50]mov r15, qword ptr [rdi+0x58]vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01vpunpcklqdq zmm8, zmm16, zmm17vpunpckhqdq zmm9, zmm16, zmm17vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20]vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20]vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01vpunpcklqdq zmm10, zmm18, zmm19vpunpckhqdq zmm11, zmm18, zmm19mov r8, qword ptr [rdi+0x20]mov r9, qword ptr [rdi+0x28]mov r10, qword ptr [rdi+0x30]mov r11, qword ptr [rdi+0x38]mov r12, qword ptr [rdi+0x60]mov r13, qword ptr [rdi+0x68]mov r14, qword ptr [rdi+0x70]mov r15, qword ptr [rdi+0x78]vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01vpunpcklqdq zmm12, zmm16, zmm17vpunpckhqdq zmm13, zmm16, zmm17vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20]vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20]vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01vpunpcklqdq zmm14, zmm18, zmm19vpunpckhqdq zmm15, zmm18, zmm19vmovdqa32 zmm27, zmmword ptr [INDEX0+rip]vmovdqa32 zmm31, zmmword ptr [INDEX1+rip]vshufps zmm16, zmm8, zmm10, 136vshufps zmm17, zmm12, zmm14, 136vmovdqa32 zmm20, zmm16vpermt2d zmm16, zmm27, zmm17vpermt2d zmm20, zmm31, zmm17vshufps zmm17, zmm8, zmm10, 221vshufps zmm30, zmm12, zmm14, 221vmovdqa32 zmm21, zmm17vpermt2d zmm17, zmm27, zmm30vpermt2d zmm21, zmm31, zmm30vshufps zmm18, zmm9, zmm11, 136vshufps zmm8, zmm13, zmm15, 136vmovdqa32 zmm22, zmm18vpermt2d zmm18, zmm27, zmm8vpermt2d zmm22, zmm31, zmm8vshufps zmm19, zmm9, zmm11, 221vshufps zmm8, zmm13, zmm15, 221vmovdqa32 zmm23, zmm19vpermt2d zmm19, zmm27, zmm8vpermt2d zmm23, zmm31, zmm8mov r8, qword ptr [rdi]mov r9, qword ptr [rdi+0x8]mov r10, qword ptr [rdi+0x10]mov r11, qword ptr [rdi+0x18]mov r12, qword ptr [rdi+0x40]mov r13, qword ptr [rdi+0x48]mov r14, qword ptr [rdi+0x50]mov r15, qword ptr [rdi+0x58]vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01vpunpcklqdq zmm8, zmm24, zmm25vpunpckhqdq zmm9, zmm24, zmm25vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20]vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20]vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01vpunpcklqdq zmm10, zmm24, zmm25vpunpckhqdq zmm11, zmm24, zmm25prefetcht0 [r8+rdx+0x80]prefetcht0 [r12+rdx+0x80]prefetcht0 [r9+rdx+0x80]prefetcht0 [r13+rdx+0x80]prefetcht0 [r10+rdx+0x80]prefetcht0 [r14+rdx+0x80]prefetcht0 [r11+rdx+0x80]prefetcht0 [r15+rdx+0x80]mov r8, qword ptr [rdi+0x20]mov r9, qword ptr [rdi+0x28]mov r10, qword ptr [rdi+0x30]mov r11, qword ptr [rdi+0x38]mov r12, qword ptr [rdi+0x60]mov r13, qword ptr [rdi+0x68]mov r14, qword ptr [rdi+0x70]mov r15, qword ptr [rdi+0x78]vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01vpunpcklqdq zmm12, zmm24, zmm25vpunpckhqdq zmm13, zmm24, zmm25vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20]vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20]vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01vpunpcklqdq zmm14, zmm24, zmm25vpunpckhqdq zmm15, zmm24, zmm25prefetcht0 [r8+rdx+0x80]prefetcht0 [r12+rdx+0x80]prefetcht0 [r9+rdx+0x80]prefetcht0 [r13+rdx+0x80]prefetcht0 [r10+rdx+0x80]prefetcht0 [r14+rdx+0x80]prefetcht0 [r11+rdx+0x80]prefetcht0 [r15+rdx+0x80]vshufps zmm24, zmm8, zmm10, 136vshufps zmm30, zmm12, zmm14, 136vmovdqa32 zmm28, zmm24vpermt2d zmm24, zmm27, zmm30vpermt2d zmm28, zmm31, zmm30vshufps zmm25, zmm8, zmm10, 221vshufps zmm30, zmm12, zmm14, 221vmovdqa32 zmm29, zmm25vpermt2d zmm25, zmm27, zmm30vpermt2d zmm29, zmm31, zmm30vshufps zmm26, zmm9, zmm11, 136vshufps zmm8, zmm13, zmm15, 136vmovdqa32 zmm30, zmm26vpermt2d zmm26, zmm27, zmm8vpermt2d zmm30, zmm31, zmm8vshufps zmm8, zmm9, zmm11, 221vshufps zmm10, zmm13, zmm15, 221vpermi2d zmm27, zmm8, zmm10vpermi2d zmm31, zmm8, zmm10vpbroadcastd zmm8, dword ptr [BLAKE3_IV_0+rip]vpbroadcastd zmm9, dword ptr [BLAKE3_IV_1+rip]vpbroadcastd zmm10, dword ptr [BLAKE3_IV_2+rip]vpbroadcastd zmm11, dword ptr [BLAKE3_IV_3+rip]vmovdqa32 zmm12, zmmword ptr [rsp]vmovdqa32 zmm13, zmmword ptr [rsp+0x1*0x40]vpbroadcastd zmm14, dword ptr [BLAKE3_BLOCK_LEN+rip]vpbroadcastd zmm15, dword ptr [rsp+0x22*0x4]vpaddd zmm0, zmm0, zmm16vpaddd zmm1, zmm1, zmm18vpaddd zmm2, zmm2, zmm20vpaddd zmm3, zmm3, zmm22vpaddd zmm0, zmm0, zmm4vpaddd zmm1, zmm1, zmm5vpaddd zmm2, zmm2, zmm6vpaddd zmm3, zmm3, zmm7vpxord zmm12, zmm12, zmm0vpxord zmm13, zmm13, zmm1vpxord zmm14, zmm14, zmm2vpxord zmm15, zmm15, zmm3vprord zmm12, zmm12, 16vprord zmm13, zmm13, 16vprord zmm14, zmm14, 16vprord zmm15, zmm15, 16vpaddd zmm8, zmm8, zmm12vpaddd zmm9, zmm9, zmm13vpaddd zmm10, zmm10, zmm14vpaddd zmm11, zmm11, zmm15vpxord zmm4, zmm4, zmm8vpxord zmm5, zmm5, zmm9vpxord zmm6, zmm6, zmm10vpxord zmm7, zmm7, zmm11vprord zmm4, zmm4, 12vprord zmm5, zmm5, 12vprord zmm6, zmm6, 12vprord zmm7, zmm7, 12vpaddd zmm0, zmm0, zmm17vpaddd zmm1, zmm1, zmm19vpaddd zmm2, zmm2, zmm21vpaddd zmm3, zmm3, zmm23vpaddd zmm0, zmm0, zmm4vpaddd zmm1, zmm1, zmm5vpaddd zmm2, zmm2, zmm6vpaddd zmm3, zmm3, zmm7vpxord zmm12, zmm12, zmm0vpxord zmm13, zmm13, zmm1vpxord zmm14, zmm14, zmm2vpxord zmm15, zmm15, zmm3vprord zmm12, zmm12, 8vprord zmm13, zmm13, 8vprord zmm14, zmm14, 8vprord zmm15, zmm15, 8vpaddd zmm8, zmm8, zmm12vpaddd zmm9, zmm9, zmm13vpaddd zmm10, zmm10, zmm14vpaddd zmm11, zmm11, zmm15vpxord zmm4, zmm4, zmm8vpxord zmm5, zmm5, zmm9vpxord zmm6, zmm6, zmm10vpxord zmm7, zmm7, zmm11vprord zmm4, zmm4, 7vprord zmm5, zmm5, 7vprord zmm6, zmm6, 7vprord zmm7, zmm7, 7vpaddd zmm0, zmm0, zmm24vpaddd zmm1, zmm1, zmm26vpaddd zmm2, zmm2, zmm28vpaddd zmm3, zmm3, zmm30vpaddd zmm0, zmm0, zmm5vpaddd zmm1, zmm1, zmm6vpaddd zmm2, zmm2, zmm7vpaddd zmm3, zmm3, zmm4vpxord zmm15, zmm15, zmm0vpxord zmm12, zmm12, zmm1vpxord zmm13, zmm13, zmm2vpxord zmm14, zmm14, zmm3vprord zmm15, zmm15, 16vprord zmm12, zmm12, 16vprord zmm13, zmm13, 16vprord zmm14, zmm14, 16vpaddd zmm10, zmm10, zmm15vpaddd zmm11, zmm11, zmm12vpaddd zmm8, zmm8, zmm13vpaddd zmm9, zmm9, zmm14vpxord zmm5, zmm5, zmm10vpxord zmm6, zmm6, zmm11vpxord zmm7, zmm7, zmm8vpxord zmm4, zmm4, zmm9vprord zmm5, zmm5, 12vprord zmm6, zmm6, 12vprord zmm7, zmm7, 12vprord zmm4, zmm4, 12vpaddd zmm0, zmm0, zmm25vpaddd zmm1, zmm1, zmm27vpaddd zmm2, zmm2, zmm29vpaddd zmm3, zmm3, zmm31vpaddd zmm0, zmm0, zmm5vpaddd zmm1, zmm1, zmm6vpaddd zmm2, zmm2, zmm7vpaddd zmm3, zmm3, zmm4vpxord zmm15, zmm15, zmm0vpxord zmm12, zmm12, zmm1vpxord zmm13, zmm13, zmm2vpxord zmm14, zmm14, zmm3vprord zmm15, zmm15, 8vprord zmm12, zmm12, 8vprord zmm13, zmm13, 8vprord zmm14, zmm14, 8vpaddd zmm10, zmm10, zmm15vpaddd zmm11, zmm11, zmm12vpaddd zmm8, zmm8, zmm13vpaddd zmm9, zmm9, zmm14vpxord zmm5, zmm5, zmm10vpxord zmm6, zmm6, zmm11vpxord zmm7, zmm7, zmm8vpxord zmm4, zmm4, zmm9vprord zmm5, zmm5, 7vprord zmm6, zmm6, 7vprord zmm7, zmm7, 7vprord zmm4, zmm4, 7vpaddd zmm0, zmm0, zmm18vpaddd zmm1, zmm1, zmm19vpaddd zmm2, zmm2, zmm23vpaddd zmm3, zmm3, zmm20vpaddd zmm0, zmm0, zmm4vpaddd zmm1, zmm1, zmm5vpaddd zmm2, zmm2, zmm6vpaddd zmm3, zmm3, zmm7vpxord zmm12, zmm12, zmm0vpxord zmm13, zmm13, zmm1vpxord zmm14, zmm14, zmm2vpxord zmm15, zmm15, zmm3vprord zmm12, zmm12, 16vprord zmm13, zmm13, 16vprord zmm14, zmm14, 16vprord zmm15, zmm15, 16vpaddd zmm8, zmm8, zmm12vpaddd zmm9, zmm9, zmm13vpaddd zmm10, zmm10, zmm14vpaddd zmm11, zmm11, zmm15vpxord zmm4, zmm4, zmm8vpxord zmm5, zmm5, zmm9vpxord zmm6, zmm6, zmm10vpxord zmm7, zmm7, zmm11vprord zmm4, zmm4, 12vprord zmm5, zmm5, 12vprord zmm6, zmm6, 12vprord zmm7, zmm7, 12vpaddd zmm0, zmm0, zmm22vpaddd zmm1, zmm1, zmm26vpaddd zmm2, zmm2, zmm16vpaddd zmm3, zmm3, zmm29vpaddd zmm0, zmm0, zmm4vpaddd zmm1, zmm1, zmm5vpaddd zmm2, zmm2, zmm6vpaddd zmm3, zmm3, zmm7vpxord zmm12, zmm12, zmm0vpxord zmm13, zmm13, zmm1vpxord zmm14, zmm14, zmm2vpxord zmm15, zmm15, zmm3vprord zmm12, zmm12, 8vprord zmm13, zmm13, 8vprord zmm14, zmm14, 8vprord zmm15, zmm15, 8vpaddd zmm8, zmm8, zmm12vpaddd zmm9, zmm9, zmm13vpaddd zmm10, zmm10, zmm14vpaddd zmm11, zmm11, zmm15vpxord zmm4, zmm4, zmm8vpxord zmm5, zmm5, zmm9vpxord zmm6, zmm6, zmm10vpxord zmm7, zmm7, zmm11vprord zmm4, zmm4, 7vprord zmm5, zmm5, 7vprord zmm6, zmm6, 7vprord zmm7, zmm7, 7vpaddd zmm0, zmm0, zmm17vpaddd zmm1, zmm1, zmm28vpaddd zmm2, zmm2, zmm25vpaddd zmm3, zmm3, zmm31vpaddd zmm0, zmm0, zmm5vpaddd zmm1, zmm1, zmm6vpaddd zmm2, zmm2, zmm7vpaddd zmm3, zmm3, zmm4vpxord zmm15, zmm15, zmm0vpxord zmm12, zmm12, zmm1vpxord zmm13, zmm13, zmm2vpxord zmm14, zmm14, zmm3vprord zmm15, zmm15, 16vprord zmm12, zmm12, 16vprord zmm13, zmm13, 16vprord zmm14, zmm14, 16vpaddd zmm10, zmm10, zmm15vpaddd zmm11, zmm11, zmm12vpaddd zmm8, zmm8, zmm13vpaddd zmm9, zmm9, zmm14vpxord zmm5, zmm5, zmm10vpxord zmm6, zmm6, zmm11vpxord zmm7, zmm7, zmm8vpxord zmm4, zmm4, zmm9vprord zmm5, zmm5, 12vprord zmm6, zmm6, 12vprord zmm7, zmm7, 12vprord zmm4, zmm4, 12vpaddd zmm0, zmm0, zmm27vpaddd zmm1, zmm1, zmm21vpaddd zmm2, zmm2, zmm30vpaddd zmm3, zmm3, zmm24vpaddd zmm0, zmm0, zmm5vpaddd zmm1, zmm1, zmm6vpaddd zmm2, zmm2, zmm7vpaddd zmm3, zmm3, zmm4vpxord zmm15, zmm15, zmm0vpxord zmm12, zmm12, zmm1vpxord zmm13, zmm13, zmm2vpxord zmm14, zmm14, zmm3vprord zmm15, zmm15, 8vprord zmm12, zmm12, 8vprord zmm13, zmm13, 8vprord zmm14, zmm14, 8vpaddd zmm10, zmm10, zmm15vpaddd zmm11, zmm11, zmm12vpaddd zmm8, zmm8, zmm13vpaddd zmm9, zmm9, zmm14vpxord zmm5, zmm5, zmm10vpxord zmm6, zmm6, zmm11vpxord zmm7, zmm7, zmm8vpxord zmm4, zmm4, zmm9vprord zmm5, zmm5, 7vprord zmm6, zmm6, 7vprord zmm7, zmm7, 7vprord zmm4, zmm4, 7vpaddd zmm0, zmm0, zmm19vpaddd zmm1, zmm1, zmm26vpaddd zmm2, zmm2, zmm29vpaddd zmm3, zmm3, zmm23vpaddd zmm0, zmm0, zmm4vpaddd zmm1, zmm1, zmm5vpaddd zmm2, zmm2, zmm6vpaddd zmm3, zmm3, zmm7vpxord zmm12, zmm12, zmm0vpxord zmm13, zmm13, zmm1vpxord zmm14, zmm14, zmm2vpxord zmm15, zmm15, zmm3vprord zmm12, zmm12, 16vprord zmm13, zmm13, 16vprord zmm14, zmm14, 16vprord zmm15, zmm15, 16vpaddd zmm8, zmm8, zmm12vpaddd zmm9, zmm9, zmm13vpaddd zmm10, zmm10, zmm14vpaddd zmm11, zmm11, zmm15vpxord zmm4, zmm4, zmm8vpxord zmm5, zmm5, zmm9vpxord zmm6, zmm6, zmm10vpxord zmm7, zmm7, zmm11vprord zmm4, zmm4, 12vprord zmm5, zmm5, 12vprord zmm6, zmm6, 12vprord zmm7, zmm7, 12vpaddd zmm0, zmm0, zmm20vpaddd zmm1, zmm1, zmm28vpaddd zmm2, zmm2, zmm18vpaddd zmm3, zmm3, zmm30vpaddd zmm0, zmm0, zmm4vpaddd zmm1, zmm1, zmm5vpaddd zmm2, zmm2, zmm6vpaddd zmm3, zmm3, zmm7vpxord zmm12, zmm12, zmm0vpxord zmm13, zmm13, zmm1vpxord zmm14, zmm14, zmm2vpxord zmm15, zmm15, zmm3vprord zmm12, zmm12, 8vprord zmm13, zmm13, 8vprord zmm14, zmm14, 8vprord zmm15, zmm15, 8vpaddd zmm8, zmm8, zmm12vpaddd zmm9, zmm9, zmm13vpaddd zmm10, zmm10, zmm14vpaddd zmm11, zmm11, zmm15vpxord zmm4, zmm4, zmm8vpxord zmm5, zmm5, zmm9vpxord zmm6, zmm6, zmm10vpxord zmm7, zmm7, zmm11vprord zmm4, zmm4, 7vprord zmm5, zmm5, 7vprord zmm6, zmm6, 7vprord zmm7, zmm7, 7vpaddd zmm0, zmm0, zmm22vpaddd zmm1, zmm1, zmm25vpaddd zmm2, zmm2, zmm27vpaddd zmm3, zmm3, zmm24vpaddd zmm0, zmm0, zmm5vpaddd zmm1, zmm1, zmm6vpaddd zmm2, zmm2, zmm7vpaddd zmm3, zmm3, zmm4vpxord zmm15, zmm15, zmm0vpxord zmm12, zmm12, zmm1vpxord zmm13, zmm13, zmm2vpxord zmm14, zmm14, zmm3vprord zmm15, zmm15, 16vprord zmm12, zmm12, 16vprord zmm13, zmm13, 16vprord zmm14, zmm14, 16vpaddd zmm10, zmm10, zmm15vpaddd zmm11, zmm11, zmm12vpaddd zmm8, zmm8, zmm13vpaddd zmm9, zmm9, zmm14vpxord zmm5, zmm5, zmm10vpxord zmm6, zmm6, zmm11vpxord zmm7, zmm7, zmm8vpxord zmm4, zmm4, zmm9vprord zmm5, zmm5, 12vprord zmm6, zmm6, 12vprord zmm7, zmm7, 12vprord zmm4, zmm4, 12vpaddd zmm0, zmm0, zmm21vpaddd zmm1, zmm1, zmm16vpaddd zmm2, zmm2, zmm31vpaddd zmm3, zmm3, zmm17vpaddd zmm0, zmm0, zmm5vpaddd zmm1, zmm1, zmm6vpaddd zmm2, zmm2, zmm7vpaddd zmm3, zmm3, zmm4vpxord zmm15, zmm15, zmm0vpxord zmm12, zmm12, zmm1vpxord zmm13, zmm13, zmm2vpxord zmm14, zmm14, zmm3vprord zmm15, zmm15, 8vprord zmm12, zmm12, 8vprord zmm13, zmm13, 8vprord zmm14, zmm14, 8vpaddd zmm10, zmm10, zmm15vpaddd zmm11, zmm11, zmm12vpaddd zmm8, zmm8, zmm13vpaddd zmm9, zmm9, zmm14vpxord zmm5, zmm5, zmm10vpxord zmm6, zmm6, zmm11vpxord zmm7, zmm7, zmm8vpxord zmm4, zmm4, zmm9vprord zmm5, zmm5, 7vprord zmm6, zmm6, 7vprord zmm7, zmm7, 7vprord zmm4, zmm4, 7vpaddd zmm0, zmm0, zmm26vpaddd zmm1, zmm1, zmm28vpaddd zmm2, zmm2, zmm30vpaddd zmm3, zmm3, zmm29vpaddd zmm0, zmm0, zmm4vpaddd zmm1, zmm1, zmm5vpaddd zmm2, zmm2, zmm6vpaddd zmm3, zmm3, zmm7vpxord zmm12, zmm12, zmm0vpxord zmm13, zmm13, zmm1vpxord zmm14, zmm14, zmm2vpxord zmm15, zmm15, zmm3vprord zmm12, zmm12, 16vprord zmm13, zmm13, 16vprord zmm14, zmm14, 16vprord zmm15, zmm15, 16vpaddd zmm8, zmm8, zmm12vpaddd zmm9, zmm9, zmm13vpaddd zmm10, zmm10, zmm14vpaddd zmm11, zmm11, zmm15vpxord zmm4, zmm4, zmm8vpxord zmm5, zmm5, zmm9vpxord zmm6, zmm6, zmm10vpxord zmm7, zmm7, zmm11vprord zmm4, zmm4, 12vprord zmm5, zmm5, 12vprord zmm6, zmm6, 12vprord zmm7, zmm7, 12vpaddd zmm0, zmm0, zmm23vpaddd zmm1, zmm1, zmm25vpaddd zmm2, zmm2, zmm19vpaddd zmm3, zmm3, zmm31vpaddd zmm0, zmm0, zmm4vpaddd zmm1, zmm1, zmm5vpaddd zmm2, zmm2, zmm6vpaddd zmm3, zmm3, zmm7vpxord zmm12, zmm12, zmm0vpxord zmm13, zmm13, zmm1vpxord zmm14, zmm14, zmm2vpxord zmm15, zmm15, zmm3vprord zmm12, zmm12, 8vprord zmm13, zmm13, 8vprord zmm14, zmm14, 8vprord zmm15, zmm15, 8vpaddd zmm8, zmm8, zmm12vpaddd zmm9, zmm9, zmm13vpaddd zmm10, zmm10, zmm14vpaddd zmm11, zmm11, zmm15vpxord zmm4, zmm4, zmm8vpxord zmm5, zmm5, zmm9vpxord zmm6, zmm6, zmm10vpxord zmm7, zmm7, zmm11vprord zmm4, zmm4, 7vprord zmm5, zmm5, 7vprord zmm6, zmm6, 7vprord zmm7, zmm7, 7vpaddd zmm0, zmm0, zmm20vpaddd zmm1, zmm1, zmm27vpaddd zmm2, zmm2, zmm21vpaddd zmm3, zmm3, zmm17vpaddd zmm0, zmm0, zmm5vpaddd zmm1, zmm1, zmm6vpaddd zmm2, zmm2, zmm7vpaddd zmm3, zmm3, zmm4vpxord zmm15, zmm15, zmm0vpxord zmm12, zmm12, zmm1vpxord zmm13, zmm13, zmm2vpxord zmm14, zmm14, zmm3vprord zmm15, zmm15, 16vprord zmm12, zmm12, 16vprord zmm13, zmm13, 16vprord zmm14, zmm14, 16vpaddd zmm10, zmm10, zmm15vpaddd zmm11, zmm11, zmm12vpaddd zmm8, zmm8, zmm13vpaddd zmm9, zmm9, zmm14vpxord zmm5, zmm5, zmm10vpxord zmm6, zmm6, zmm11vpxord zmm7, zmm7, zmm8vpxord zmm4, zmm4, zmm9vprord zmm5, zmm5, 12vprord zmm6, zmm6, 12vprord zmm7, zmm7, 12vprord zmm4, zmm4, 12vpaddd zmm0, zmm0, zmm16vpaddd zmm1, zmm1, zmm18vpaddd zmm2, zmm2, zmm24vpaddd zmm3, zmm3, zmm22vpaddd zmm0, zmm0, zmm5vpaddd zmm1, zmm1, zmm6vpaddd zmm2, zmm2, zmm7vpaddd zmm3, zmm3, zmm4vpxord zmm15, zmm15, zmm0vpxord zmm12, zmm12, zmm1vpxord zmm13, zmm13, zmm2vpxord zmm14, zmm14, zmm3vprord zmm15, zmm15, 8vprord zmm12, zmm12, 8vprord zmm13, zmm13, 8vprord zmm14, zmm14, 8vpaddd zmm10, zmm10, zmm15vpaddd zmm11, zmm11, zmm12vpaddd zmm8, zmm8, zmm13vpaddd zmm9, zmm9, zmm14vpxord zmm5, zmm5, zmm10vpxord zmm6, zmm6, zmm11vpxord zmm7, zmm7, zmm8vpxord zmm4, zmm4, zmm9vprord zmm5, zmm5, 7vprord zmm6, zmm6, 7vprord zmm7, zmm7, 7vprord zmm4, zmm4, 7vpaddd zmm0, zmm0, zmm28vpaddd zmm1, zmm1, zmm25vpaddd zmm2, zmm2, zmm31vpaddd zmm3, zmm3, zmm30vpaddd zmm0, zmm0, zmm4vpaddd zmm1, zmm1, zmm5vpaddd zmm2, zmm2, zmm6vpaddd zmm3, zmm3, zmm7vpxord zmm12, zmm12, zmm0vpxord zmm13, zmm13, zmm1vpxord zmm14, zmm14, zmm2vpxord zmm15, zmm15, zmm3vprord zmm12, zmm12, 16vprord zmm13, zmm13, 16vprord zmm14, zmm14, 16vprord zmm15, zmm15, 16vpaddd zmm8, zmm8, zmm12vpaddd zmm9, zmm9, zmm13vpaddd zmm10, zmm10, zmm14vpaddd zmm11, zmm11, zmm15vpxord zmm4, zmm4, zmm8vpxord zmm5, zmm5, zmm9vpxord zmm6, zmm6, zmm10vpxord zmm7, zmm7, zmm11vprord zmm4, zmm4, 12vprord zmm5, zmm5, 12vprord zmm6, zmm6, 12vprord zmm7, zmm7, 12vpaddd zmm0, zmm0, zmm29vpaddd zmm1, zmm1, zmm27vpaddd zmm2, zmm2, zmm26vpaddd zmm3, zmm3, zmm24vpaddd zmm0, zmm0, zmm4vpaddd zmm1, zmm1, zmm5vpaddd zmm2, zmm2, zmm6vpaddd zmm3, zmm3, zmm7vpxord zmm12, zmm12, zmm0vpxord zmm13, zmm13, zmm1vpxord zmm14, zmm14, zmm2vpxord zmm15, zmm15, zmm3vprord zmm12, zmm12, 8vprord zmm13, zmm13, 8vprord zmm14, zmm14, 8vprord zmm15, zmm15, 8vpaddd zmm8, zmm8, zmm12vpaddd zmm9, zmm9, zmm13vpaddd zmm10, zmm10, zmm14vpaddd zmm11, zmm11, zmm15vpxord zmm4, zmm4, zmm8vpxord zmm5, zmm5, zmm9vpxord zmm6, zmm6, zmm10vpxord zmm7, zmm7, zmm11vprord zmm4, zmm4, 7vprord zmm5, zmm5, 7vprord zmm6, zmm6, 7vprord zmm7, zmm7, 7vpaddd zmm0, zmm0, zmm23vpaddd zmm1, zmm1, zmm21vpaddd zmm2, zmm2, zmm16vpaddd zmm3, zmm3, zmm22vpaddd zmm0, zmm0, zmm5vpaddd zmm1, zmm1, zmm6vpaddd zmm2, zmm2, zmm7vpaddd zmm3, zmm3, zmm4vpxord zmm15, zmm15, zmm0vpxord zmm12, zmm12, zmm1vpxord zmm13, zmm13, zmm2vpxord zmm14, zmm14, zmm3vprord zmm15, zmm15, 16vprord zmm12, zmm12, 16vprord zmm13, zmm13, 16vprord zmm14, zmm14, 16vpaddd zmm10, zmm10, zmm15vpaddd zmm11, zmm11, zmm12vpaddd zmm8, zmm8, zmm13vpaddd zmm9, zmm9, zmm14vpxord zmm5, zmm5, zmm10vpxord zmm6, zmm6, zmm11vpxord zmm7, zmm7, zmm8vpxord zmm4, zmm4, zmm9vprord zmm5, zmm5, 12vprord zmm6, zmm6, 12vprord zmm7, zmm7, 12vprord zmm4, zmm4, 12vpaddd zmm0, zmm0, zmm18vpaddd zmm1, zmm1, zmm19vpaddd zmm2, zmm2, zmm17vpaddd zmm3, zmm3, zmm20vpaddd zmm0, zmm0, zmm5vpaddd zmm1, zmm1, zmm6vpaddd zmm2, zmm2, zmm7vpaddd zmm3, zmm3, zmm4vpxord zmm15, zmm15, zmm0vpxord zmm12, zmm12, zmm1vpxord zmm13, zmm13, zmm2vpxord zmm14, zmm14, zmm3vprord zmm15, zmm15, 8vprord zmm12, zmm12, 8vprord zmm13, zmm13, 8vprord zmm14, zmm14, 8vpaddd zmm10, zmm10, zmm15vpaddd zmm11, zmm11, zmm12vpaddd zmm8, zmm8, zmm13vpaddd zmm9, zmm9, zmm14vpxord zmm5, zmm5, zmm10vpxord zmm6, zmm6, zmm11vpxord zmm7, zmm7, zmm8vpxord zmm4, zmm4, zmm9vprord zmm5, zmm5, 7vprord zmm6, zmm6, 7vprord zmm7, zmm7, 7vprord zmm4, zmm4, 7vpaddd zmm0, zmm0, zmm25vpaddd zmm1, zmm1, zmm27vpaddd zmm2, zmm2, zmm24vpaddd zmm3, zmm3, zmm31vpaddd zmm0, zmm0, zmm4vpaddd zmm1, zmm1, zmm5vpaddd zmm2, zmm2, zmm6vpaddd zmm3, zmm3, zmm7vpxord zmm12, zmm12, zmm0vpxord zmm13, zmm13, zmm1vpxord zmm14, zmm14, zmm2vpxord zmm15, zmm15, zmm3vprord zmm12, zmm12, 16vprord zmm13, zmm13, 16vprord zmm14, zmm14, 16vprord zmm15, zmm15, 16vpaddd zmm8, zmm8, zmm12vpaddd zmm9, zmm9, zmm13vpaddd zmm10, zmm10, zmm14vpaddd zmm11, zmm11, zmm15vpxord zmm4, zmm4, zmm8vpxord zmm5, zmm5, zmm9vpxord zmm6, zmm6, zmm10vpxord zmm7, zmm7, zmm11vprord zmm4, zmm4, 12vprord zmm5, zmm5, 12vprord zmm6, zmm6, 12vprord zmm7, zmm7, 12vpaddd zmm0, zmm0, zmm30vpaddd zmm1, zmm1, zmm21vpaddd zmm2, zmm2, zmm28vpaddd zmm3, zmm3, zmm17vpaddd zmm0, zmm0, zmm4vpaddd zmm1, zmm1, zmm5vpaddd zmm2, zmm2, zmm6vpaddd zmm3, zmm3, zmm7vpxord zmm12, zmm12, zmm0vpxord zmm13, zmm13, zmm1vpxord zmm14, zmm14, zmm2vpxord zmm15, zmm15, zmm3vprord zmm12, zmm12, 8vprord zmm13, zmm13, 8vprord zmm14, zmm14, 8vprord zmm15, zmm15, 8vpaddd zmm8, zmm8, zmm12vpaddd zmm9, zmm9, zmm13vpaddd zmm10, zmm10, zmm14vpaddd zmm11, zmm11, zmm15vpxord zmm4, zmm4, zmm8vpxord zmm5, zmm5, zmm9vpxord zmm6, zmm6, zmm10vpxord zmm7, zmm7, zmm11vprord zmm4, zmm4, 7vprord zmm5, zmm5, 7vprord zmm6, zmm6, 7vprord zmm7, zmm7, 7vpaddd zmm0, zmm0, zmm29vpaddd zmm1, zmm1, zmm16vpaddd zmm2, zmm2, zmm18vpaddd zmm3, zmm3, zmm20vpaddd zmm0, zmm0, zmm5vpaddd zmm1, zmm1, zmm6vpaddd zmm2, zmm2, zmm7vpaddd zmm3, zmm3, zmm4vpxord zmm15, zmm15, zmm0vpxord zmm12, zmm12, zmm1vpxord zmm13, zmm13, zmm2vpxord zmm14, zmm14, zmm3vprord zmm15, zmm15, 16vprord zmm12, zmm12, 16vprord zmm13, zmm13, 16vprord zmm14, zmm14, 16vpaddd zmm10, zmm10, zmm15vpaddd zmm11, zmm11, zmm12vpaddd zmm8, zmm8, zmm13vpaddd zmm9, zmm9, zmm14vpxord zmm5, zmm5, zmm10vpxord zmm6, zmm6, zmm11vpxord zmm7, zmm7, zmm8vpxord zmm4, zmm4, zmm9vprord zmm5, zmm5, 12vprord zmm6, zmm6, 12vprord zmm7, zmm7, 12vprord zmm4, zmm4, 12vpaddd zmm0, zmm0, zmm19vpaddd zmm1, zmm1, zmm26vpaddd zmm2, zmm2, zmm22vpaddd zmm3, zmm3, zmm23vpaddd zmm0, zmm0, zmm5vpaddd zmm1, zmm1, zmm6vpaddd zmm2, zmm2, zmm7vpaddd zmm3, zmm3, zmm4vpxord zmm15, zmm15, zmm0vpxord zmm12, zmm12, zmm1vpxord zmm13, zmm13, zmm2vpxord zmm14, zmm14, zmm3vprord zmm15, zmm15, 8vprord zmm12, zmm12, 8vprord zmm13, zmm13, 8vprord zmm14, zmm14, 8vpaddd zmm10, zmm10, zmm15vpaddd zmm11, zmm11, zmm12vpaddd zmm8, zmm8, zmm13vpaddd zmm9, zmm9, zmm14vpxord zmm5, zmm5, zmm10vpxord zmm6, zmm6, zmm11vpxord zmm7, zmm7, zmm8vpxord zmm4, zmm4, zmm9vprord zmm5, zmm5, 7vprord zmm6, zmm6, 7vprord zmm7, zmm7, 7vprord zmm4, zmm4, 7vpaddd zmm0, zmm0, zmm27vpaddd zmm1, zmm1, zmm21vpaddd zmm2, zmm2, zmm17vpaddd zmm3, zmm3, zmm24vpaddd zmm0, zmm0, zmm4vpaddd zmm1, zmm1, zmm5vpaddd zmm2, zmm2, zmm6vpaddd zmm3, zmm3, zmm7vpxord zmm12, zmm12, zmm0vpxord zmm13, zmm13, zmm1vpxord zmm14, zmm14, zmm2vpxord zmm15, zmm15, zmm3vprord zmm12, zmm12, 16vprord zmm13, zmm13, 16vprord zmm14, zmm14, 16vprord zmm15, zmm15, 16vpaddd zmm8, zmm8, zmm12vpaddd zmm9, zmm9, zmm13vpaddd zmm10, zmm10, zmm14vpaddd zmm11, zmm11, zmm15vpxord zmm4, zmm4, zmm8vpxord zmm5, zmm5, zmm9vpxord zmm6, zmm6, zmm10vpxord zmm7, zmm7, zmm11vprord zmm4, zmm4, 12vprord zmm5, zmm5, 12vprord zmm6, zmm6, 12vprord zmm7, zmm7, 12vpaddd zmm0, zmm0, zmm31vpaddd zmm1, zmm1, zmm16vpaddd zmm2, zmm2, zmm25vpaddd zmm3, zmm3, zmm22vpaddd zmm0, zmm0, zmm4vpaddd zmm1, zmm1, zmm5vpaddd zmm2, zmm2, zmm6vpaddd zmm3, zmm3, zmm7vpxord zmm12, zmm12, zmm0vpxord zmm13, zmm13, zmm1vpxord zmm14, zmm14, zmm2vpxord zmm15, zmm15, zmm3vprord zmm12, zmm12, 8vprord zmm13, zmm13, 8vprord zmm14, zmm14, 8vprord zmm15, zmm15, 8vpaddd zmm8, zmm8, zmm12vpaddd zmm9, zmm9, zmm13vpaddd zmm10, zmm10, zmm14vpaddd zmm11, zmm11, zmm15vpxord zmm4, zmm4, zmm8vpxord zmm5, zmm5, zmm9vpxord zmm6, zmm6, zmm10vpxord zmm7, zmm7, zmm11vprord zmm4, zmm4, 7vprord zmm5, zmm5, 7vprord zmm6, zmm6, 7vprord zmm7, zmm7, 7vpaddd zmm0, zmm0, zmm30vpaddd zmm1, zmm1, zmm18vpaddd zmm2, zmm2, zmm19vpaddd zmm3, zmm3, zmm23vpaddd zmm0, zmm0, zmm5vpaddd zmm1, zmm1, zmm6vpaddd zmm2, zmm2, zmm7vpaddd zmm3, zmm3, zmm4vpxord zmm15, zmm15, zmm0vpxord zmm12, zmm12, zmm1vpxord zmm13, zmm13, zmm2vpxord zmm14, zmm14, zmm3vprord zmm15, zmm15, 16vprord zmm12, zmm12, 16vprord zmm13, zmm13, 16vprord zmm14, zmm14, 16vpaddd zmm10, zmm10, zmm15vpaddd zmm11, zmm11, zmm12vpaddd zmm8, zmm8, zmm13vpaddd zmm9, zmm9, zmm14vpxord zmm5, zmm5, zmm10vpxord zmm6, zmm6, zmm11vpxord zmm7, zmm7, zmm8vpxord zmm4, zmm4, zmm9vprord zmm5, zmm5, 12vprord zmm6, zmm6, 12vprord zmm7, zmm7, 12vprord zmm4, zmm4, 12vpaddd zmm0, zmm0, zmm26vpaddd zmm1, zmm1, zmm28vpaddd zmm2, zmm2, zmm20vpaddd zmm3, zmm3, zmm29vpaddd zmm0, zmm0, zmm5vpaddd zmm1, zmm1, zmm6vpaddd zmm2, zmm2, zmm7vpaddd zmm3, zmm3, zmm4vpxord zmm15, zmm15, zmm0vpxord zmm12, zmm12, zmm1vpxord zmm13, zmm13, zmm2vpxord zmm14, zmm14, zmm3vprord zmm15, zmm15, 8vprord zmm12, zmm12, 8vprord zmm13, zmm13, 8vprord zmm14, zmm14, 8vpaddd zmm10, zmm10, zmm15vpaddd zmm11, zmm11, zmm12vpaddd zmm8, zmm8, zmm13vpaddd zmm9, zmm9, zmm14vpxord zmm5, zmm5, zmm10vpxord zmm6, zmm6, zmm11vpxord zmm7, zmm7, zmm8vpxord zmm4, zmm4, zmm9vprord zmm5, zmm5, 7vprord zmm6, zmm6, 7vprord zmm7, zmm7, 7vprord zmm4, zmm4, 7vpxord zmm0, zmm0, zmm8vpxord zmm1, zmm1, zmm9vpxord zmm2, zmm2, zmm10vpxord zmm3, zmm3, zmm11vpxord zmm4, zmm4, zmm12vpxord zmm5, zmm5, zmm13vpxord zmm6, zmm6, zmm14vpxord zmm7, zmm7, zmm15movzx eax, byte ptr [rbp+0x38]jne 9bmov rbx, qword ptr [rbp+0x50]vpunpckldq zmm16, zmm0, zmm1vpunpckhdq zmm17, zmm0, zmm1vpunpckldq zmm18, zmm2, zmm3vpunpckhdq zmm19, zmm2, zmm3vpunpckldq zmm20, zmm4, zmm5vpunpckhdq zmm21, zmm4, zmm5vpunpckldq zmm22, zmm6, zmm7vpunpckhdq zmm23, zmm6, zmm7vpunpcklqdq zmm0, zmm16, zmm18vpunpckhqdq zmm1, zmm16, zmm18vpunpcklqdq zmm2, zmm17, zmm19vpunpckhqdq zmm3, zmm17, zmm19vpunpcklqdq zmm4, zmm20, zmm22vpunpckhqdq zmm5, zmm20, zmm22vpunpcklqdq zmm6, zmm21, zmm23vpunpckhqdq zmm7, zmm21, zmm23vshufi32x4 zmm16, zmm0, zmm4, 0x88vshufi32x4 zmm17, zmm1, zmm5, 0x88vshufi32x4 zmm18, zmm2, zmm6, 0x88vshufi32x4 zmm19, zmm3, zmm7, 0x88vshufi32x4 zmm20, zmm0, zmm4, 0xDDvshufi32x4 zmm21, zmm1, zmm5, 0xDDvshufi32x4 zmm22, zmm2, zmm6, 0xDDvshufi32x4 zmm23, zmm3, zmm7, 0xDDvshufi32x4 zmm0, zmm16, zmm17, 0x88vshufi32x4 zmm1, zmm18, zmm19, 0x88vshufi32x4 zmm2, zmm20, zmm21, 0x88vshufi32x4 zmm3, zmm22, zmm23, 0x88vshufi32x4 zmm4, zmm16, zmm17, 0xDDvshufi32x4 zmm5, zmm18, zmm19, 0xDDvshufi32x4 zmm6, zmm20, zmm21, 0xDDvshufi32x4 zmm7, zmm22, zmm23, 0xDDvmovdqu32 zmmword ptr [rbx], zmm0vmovdqu32 zmmword ptr [rbx+0x1*0x40], zmm1vmovdqu32 zmmword ptr [rbx+0x2*0x40], zmm2vmovdqu32 zmmword ptr [rbx+0x3*0x40], zmm3vmovdqu32 zmmword ptr [rbx+0x4*0x40], zmm4vmovdqu32 zmmword ptr [rbx+0x5*0x40], zmm5vmovdqu32 zmmword ptr [rbx+0x6*0x40], zmm6vmovdqu32 zmmword ptr [rbx+0x7*0x40], zmm7vmovdqa32 zmm0, zmmword ptr [rsp]vmovdqa32 zmm1, zmmword ptr [rsp+0x1*0x40]vmovdqa32 zmm2, zmm0vpaddd zmm2{k1}, zmm0, dword ptr [ADD16+rip] {1to16}vpcmpltud k2, zmm2, zmm0vpaddd zmm1 {k2}, zmm1, dword ptr [ADD1+rip] {1to16}vmovdqa32 zmmword ptr [rsp], zmm2vmovdqa32 zmmword ptr [rsp+0x1*0x40], zmm1add rdi, 128add rbx, 512mov qword ptr [rbp+0x50], rbxsub rsi, 16cmp rsi, 16jnc 2btest rsi, rsijnz 3f4:vzerouppermov rsp, rbppop rbppop rbxpop r12pop r13pop r14pop r15ret.p2align 63:test esi, 0x8je 3fvpbroadcastd ymm0, dword ptr [rcx]vpbroadcastd ymm1, dword ptr [rcx+0x4]vpbroadcastd ymm2, dword ptr [rcx+0x8]vpbroadcastd ymm3, dword ptr [rcx+0xC]vpbroadcastd ymm4, dword ptr [rcx+0x10]vpbroadcastd ymm5, dword ptr [rcx+0x14]vpbroadcastd ymm6, dword ptr [rcx+0x18]vpbroadcastd ymm7, dword ptr [rcx+0x1C]mov r8, qword ptr [rdi]mov r9, qword ptr [rdi+0x8]mov r10, qword ptr [rdi+0x10]mov r11, qword ptr [rdi+0x18]mov r12, qword ptr [rdi+0x20]mov r13, qword ptr [rdi+0x28]mov r14, qword ptr [rdi+0x30]mov r15, qword ptr [rdi+0x38]movzx eax, byte ptr [rbp+0x38]movzx ebx, byte ptr [rbp+0x40]or eax, ebxxor edx, edx2:movzx ebx, byte ptr [rbp+0x48]or ebx, eaxadd rdx, 64cmp rdx, qword ptr [rsp+0x80]cmove eax, ebxmov dword ptr [rsp+0x88], eaxvmovups xmm8, xmmword ptr [r8+rdx-0x40]vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x40], 0x01vmovups xmm9, xmmword ptr [r9+rdx-0x40]vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x40], 0x01vunpcklpd ymm12, ymm8, ymm9vunpckhpd ymm13, ymm8, ymm9vmovups xmm10, xmmword ptr [r10+rdx-0x40]vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x40], 0x01vmovups xmm11, xmmword ptr [r11+rdx-0x40]vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x40], 0x01vunpcklpd ymm14, ymm10, ymm11vunpckhpd ymm15, ymm10, ymm11vshufps ymm16, ymm12, ymm14, 136vshufps ymm17, ymm12, ymm14, 221vshufps ymm18, ymm13, ymm15, 136vshufps ymm19, ymm13, ymm15, 221vmovups xmm8, xmmword ptr [r8+rdx-0x30]vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x30], 0x01vmovups xmm9, xmmword ptr [r9+rdx-0x30]vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x30], 0x01vunpcklpd ymm12, ymm8, ymm9vunpckhpd ymm13, ymm8, ymm9vmovups xmm10, xmmword ptr [r10+rdx-0x30]vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x30], 0x01vmovups xmm11, xmmword ptr [r11+rdx-0x30]vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x30], 0x01vunpcklpd ymm14, ymm10, ymm11vunpckhpd ymm15, ymm10, ymm11vshufps ymm20, ymm12, ymm14, 136vshufps ymm21, ymm12, ymm14, 221vshufps ymm22, ymm13, ymm15, 136vshufps ymm23, ymm13, ymm15, 221vmovups xmm8, xmmword ptr [r8+rdx-0x20]vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x20], 0x01vmovups xmm9, xmmword ptr [r9+rdx-0x20]vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x20], 0x01vunpcklpd ymm12, ymm8, ymm9vunpckhpd ymm13, ymm8, ymm9vmovups xmm10, xmmword ptr [r10+rdx-0x20]vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x20], 0x01vmovups xmm11, xmmword ptr [r11+rdx-0x20]vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x20], 0x01vunpcklpd ymm14, ymm10, ymm11vunpckhpd ymm15, ymm10, ymm11vshufps ymm24, ymm12, ymm14, 136vshufps ymm25, ymm12, ymm14, 221vshufps ymm26, ymm13, ymm15, 136vshufps ymm27, ymm13, ymm15, 221vmovups xmm8, xmmword ptr [r8+rdx-0x10]vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x10], 0x01vmovups xmm9, xmmword ptr [r9+rdx-0x10]vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x10], 0x01vunpcklpd ymm12, ymm8, ymm9vunpckhpd ymm13, ymm8, ymm9vmovups xmm10, xmmword ptr [r10+rdx-0x10]vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x10], 0x01vmovups xmm11, xmmword ptr [r11+rdx-0x10]vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x10], 0x01vunpcklpd ymm14, ymm10, ymm11vunpckhpd ymm15, ymm10, ymm11vshufps ymm28, ymm12, ymm14, 136vshufps ymm29, ymm12, ymm14, 221vshufps ymm30, ymm13, ymm15, 136vshufps ymm31, ymm13, ymm15, 221vpbroadcastd ymm8, dword ptr [BLAKE3_IV_0+rip]vpbroadcastd ymm9, dword ptr [BLAKE3_IV_1+rip]vpbroadcastd ymm10, dword ptr [BLAKE3_IV_2+rip]vpbroadcastd ymm11, dword ptr [BLAKE3_IV_3+rip]vmovdqa ymm12, ymmword ptr [rsp]vmovdqa ymm13, ymmword ptr [rsp+0x40]vpbroadcastd ymm14, dword ptr [BLAKE3_BLOCK_LEN+rip]vpbroadcastd ymm15, dword ptr [rsp+0x88]vpaddd ymm0, ymm0, ymm16vpaddd ymm1, ymm1, ymm18vpaddd ymm2, ymm2, ymm20vpaddd ymm3, ymm3, ymm22vpaddd ymm0, ymm0, ymm4vpaddd ymm1, ymm1, ymm5vpaddd ymm2, ymm2, ymm6vpaddd ymm3, ymm3, ymm7vpxord ymm12, ymm12, ymm0vpxord ymm13, ymm13, ymm1vpxord ymm14, ymm14, ymm2vpxord ymm15, ymm15, ymm3vprord ymm12, ymm12, 16vprord ymm13, ymm13, 16vprord ymm14, ymm14, 16vprord ymm15, ymm15, 16vpaddd ymm8, ymm8, ymm12vpaddd ymm9, ymm9, ymm13vpaddd ymm10, ymm10, ymm14vpaddd ymm11, ymm11, ymm15vpxord ymm4, ymm4, ymm8vpxord ymm5, ymm5, ymm9vpxord ymm6, ymm6, ymm10vpxord ymm7, ymm7, ymm11vprord ymm4, ymm4, 12vprord ymm5, ymm5, 12vprord ymm6, ymm6, 12vprord ymm7, ymm7, 12vpaddd ymm0, ymm0, ymm17vpaddd ymm1, ymm1, ymm19vpaddd ymm2, ymm2, ymm21vpaddd ymm3, ymm3, ymm23vpaddd ymm0, ymm0, ymm4vpaddd ymm1, ymm1, ymm5vpaddd ymm2, ymm2, ymm6vpaddd ymm3, ymm3, ymm7vpxord ymm12, ymm12, ymm0vpxord ymm13, ymm13, ymm1vpxord ymm14, ymm14, ymm2vpxord ymm15, ymm15, ymm3vprord ymm12, ymm12, 8vprord ymm13, ymm13, 8vprord ymm14, ymm14, 8vprord ymm15, ymm15, 8vpaddd ymm8, ymm8, ymm12vpaddd ymm9, ymm9, ymm13vpaddd ymm10, ymm10, ymm14vpaddd ymm11, ymm11, ymm15vpxord ymm4, ymm4, ymm8vpxord ymm5, ymm5, ymm9vpxord ymm6, ymm6, ymm10vpxord ymm7, ymm7, ymm11vprord ymm4, ymm4, 7vprord ymm5, ymm5, 7vprord ymm6, ymm6, 7vprord ymm7, ymm7, 7vpaddd ymm0, ymm0, ymm24vpaddd ymm1, ymm1, ymm26vpaddd ymm2, ymm2, ymm28vpaddd ymm3, ymm3, ymm30vpaddd ymm0, ymm0, ymm5vpaddd ymm1, ymm1, ymm6vpaddd ymm2, ymm2, ymm7vpaddd ymm3, ymm3, ymm4vpxord ymm15, ymm15, ymm0vpxord ymm12, ymm12, ymm1vpxord ymm13, ymm13, ymm2vpxord ymm14, ymm14, ymm3vprord ymm15, ymm15, 16vprord ymm12, ymm12, 16vprord ymm13, ymm13, 16vprord ymm14, ymm14, 16vpaddd ymm10, ymm10, ymm15vpaddd ymm11, ymm11, ymm12vpaddd ymm8, ymm8, ymm13vpaddd ymm9, ymm9, ymm14vpxord ymm5, ymm5, ymm10vpxord ymm6, ymm6, ymm11vpxord ymm7, ymm7, ymm8vpxord ymm4, ymm4, ymm9vprord ymm5, ymm5, 12vprord ymm6, ymm6, 12vprord ymm7, ymm7, 12vprord ymm4, ymm4, 12vpaddd ymm0, ymm0, ymm25vpaddd ymm1, ymm1, ymm27vpaddd ymm2, ymm2, ymm29vpaddd ymm3, ymm3, ymm31vpaddd ymm0, ymm0, ymm5vpaddd ymm1, ymm1, ymm6vpaddd ymm2, ymm2, ymm7vpaddd ymm3, ymm3, ymm4vpxord ymm15, ymm15, ymm0vpxord ymm12, ymm12, ymm1vpxord ymm13, ymm13, ymm2vpxord ymm14, ymm14, ymm3vprord ymm15, ymm15, 8vprord ymm12, ymm12, 8vprord ymm13, ymm13, 8vprord ymm14, ymm14, 8vpaddd ymm10, ymm10, ymm15vpaddd ymm11, ymm11, ymm12vpaddd ymm8, ymm8, ymm13vpaddd ymm9, ymm9, ymm14vpxord ymm5, ymm5, ymm10vpxord ymm6, ymm6, ymm11vpxord ymm7, ymm7, ymm8vpxord ymm4, ymm4, ymm9vprord ymm5, ymm5, 7vprord ymm6, ymm6, 7vprord ymm7, ymm7, 7vprord ymm4, ymm4, 7vpaddd ymm0, ymm0, ymm18vpaddd ymm1, ymm1, ymm19vpaddd ymm2, ymm2, ymm23vpaddd ymm3, ymm3, ymm20vpaddd ymm0, ymm0, ymm4vpaddd ymm1, ymm1, ymm5vpaddd ymm2, ymm2, ymm6vpaddd ymm3, ymm3, ymm7vpxord ymm12, ymm12, ymm0vpxord ymm13, ymm13, ymm1vpxord ymm14, ymm14, ymm2vpxord ymm15, ymm15, ymm3vprord ymm12, ymm12, 16vprord ymm13, ymm13, 16vprord ymm14, ymm14, 16vprord ymm15, ymm15, 16vpaddd ymm8, ymm8, ymm12vpaddd ymm9, ymm9, ymm13vpaddd ymm10, ymm10, ymm14vpaddd ymm11, ymm11, ymm15vpxord ymm4, ymm4, ymm8vpxord ymm5, ymm5, ymm9vpxord ymm6, ymm6, ymm10vpxord ymm7, ymm7, ymm11vprord ymm4, ymm4, 12vprord ymm5, ymm5, 12vprord ymm6, ymm6, 12vprord ymm7, ymm7, 12vpaddd ymm0, ymm0, ymm22vpaddd ymm1, ymm1, ymm26vpaddd ymm2, ymm2, ymm16vpaddd ymm3, ymm3, ymm29vpaddd ymm0, ymm0, ymm4vpaddd ymm1, ymm1, ymm5vpaddd ymm2, ymm2, ymm6vpaddd ymm3, ymm3, ymm7vpxord ymm12, ymm12, ymm0vpxord ymm13, ymm13, ymm1vpxord ymm14, ymm14, ymm2vpxord ymm15, ymm15, ymm3vprord ymm12, ymm12, 8vprord ymm13, ymm13, 8vprord ymm14, ymm14, 8vprord ymm15, ymm15, 8vpaddd ymm8, ymm8, ymm12vpaddd ymm9, ymm9, ymm13vpaddd ymm10, ymm10, ymm14vpaddd ymm11, ymm11, ymm15vpxord ymm4, ymm4, ymm8vpxord ymm5, ymm5, ymm9vpxord ymm6, ymm6, ymm10vpxord ymm7, ymm7, ymm11vprord ymm4, ymm4, 7vprord ymm5, ymm5, 7vprord ymm6, ymm6, 7vprord ymm7, ymm7, 7vpaddd ymm0, ymm0, ymm17vpaddd ymm1, ymm1, ymm28vpaddd ymm2, ymm2, ymm25vpaddd ymm3, ymm3, ymm31vpaddd ymm0, ymm0, ymm5vpaddd ymm1, ymm1, ymm6vpaddd ymm2, ymm2, ymm7vpaddd ymm3, ymm3, ymm4vpxord ymm15, ymm15, ymm0vpxord ymm12, ymm12, ymm1vpxord ymm13, ymm13, ymm2vpxord ymm14, ymm14, ymm3vprord ymm15, ymm15, 16vprord ymm12, ymm12, 16vprord ymm13, ymm13, 16vprord ymm14, ymm14, 16vpaddd ymm10, ymm10, ymm15vpaddd ymm11, ymm11, ymm12vpaddd ymm8, ymm8, ymm13vpaddd ymm9, ymm9, ymm14vpxord ymm5, ymm5, ymm10vpxord ymm6, ymm6, ymm11vpxord ymm7, ymm7, ymm8vpxord ymm4, ymm4, ymm9vprord ymm5, ymm5, 12vprord ymm6, ymm6, 12vprord ymm7, ymm7, 12vprord ymm4, ymm4, 12vpaddd ymm0, ymm0, ymm27vpaddd ymm1, ymm1, ymm21vpaddd ymm2, ymm2, ymm30vpaddd ymm3, ymm3, ymm24vpaddd ymm0, ymm0, ymm5vpaddd ymm1, ymm1, ymm6vpaddd ymm2, ymm2, ymm7vpaddd ymm3, ymm3, ymm4vpxord ymm15, ymm15, ymm0vpxord ymm12, ymm12, ymm1vpxord ymm13, ymm13, ymm2vpxord ymm14, ymm14, ymm3vprord ymm15, ymm15, 8vprord ymm12, ymm12, 8vprord ymm13, ymm13, 8vprord ymm14, ymm14, 8vpaddd ymm10, ymm10, ymm15vpaddd ymm11, ymm11, ymm12vpaddd ymm8, ymm8, ymm13vpaddd ymm9, ymm9, ymm14vpxord ymm5, ymm5, ymm10vpxord ymm6, ymm6, ymm11vpxord ymm7, ymm7, ymm8vpxord ymm4, ymm4, ymm9vprord ymm5, ymm5, 7vprord ymm6, ymm6, 7vprord ymm7, ymm7, 7vprord ymm4, ymm4, 7vpaddd ymm0, ymm0, ymm19vpaddd ymm1, ymm1, ymm26vpaddd ymm2, ymm2, ymm29vpaddd ymm3, ymm3, ymm23vpaddd ymm0, ymm0, ymm4vpaddd ymm1, ymm1, ymm5vpaddd ymm2, ymm2, ymm6vpaddd ymm3, ymm3, ymm7vpxord ymm12, ymm12, ymm0vpxord ymm13, ymm13, ymm1vpxord ymm14, ymm14, ymm2vpxord ymm15, ymm15, ymm3vprord ymm12, ymm12, 16vprord ymm13, ymm13, 16vprord ymm14, ymm14, 16vprord ymm15, ymm15, 16vpaddd ymm8, ymm8, ymm12vpaddd ymm9, ymm9, ymm13vpaddd ymm10, ymm10, ymm14vpaddd ymm11, ymm11, ymm15vpxord ymm4, ymm4, ymm8vpxord ymm5, ymm5, ymm9vpxord ymm6, ymm6, ymm10vpxord ymm7, ymm7, ymm11vprord ymm4, ymm4, 12vprord ymm5, ymm5, 12vprord ymm6, ymm6, 12vprord ymm7, ymm7, 12vpaddd ymm0, ymm0, ymm20vpaddd ymm1, ymm1, ymm28vpaddd ymm2, ymm2, ymm18vpaddd ymm3, ymm3, ymm30vpaddd ymm0, ymm0, ymm4vpaddd ymm1, ymm1, ymm5vpaddd ymm2, ymm2, ymm6vpaddd ymm3, ymm3, ymm7vpxord ymm12, ymm12, ymm0vpxord ymm13, ymm13, ymm1vpxord ymm14, ymm14, ymm2vpxord ymm15, ymm15, ymm3vprord ymm12, ymm12, 8vprord ymm13, ymm13, 8vprord ymm14, ymm14, 8vprord ymm15, ymm15, 8vpaddd ymm8, ymm8, ymm12vpaddd ymm9, ymm9, ymm13vpaddd ymm10, ymm10, ymm14vpaddd ymm11, ymm11, ymm15vpxord ymm4, ymm4, ymm8vpxord ymm5, ymm5, ymm9vpxord ymm6, ymm6, ymm10vpxord ymm7, ymm7, ymm11vprord ymm4, ymm4, 7vprord ymm5, ymm5, 7vprord ymm6, ymm6, 7vprord ymm7, ymm7, 7vpaddd ymm0, ymm0, ymm22vpaddd ymm1, ymm1, ymm25vpaddd ymm2, ymm2, ymm27vpaddd ymm3, ymm3, ymm24vpaddd ymm0, ymm0, ymm5vpaddd ymm1, ymm1, ymm6vpaddd ymm2, ymm2, ymm7vpaddd ymm3, ymm3, ymm4vpxord ymm15, ymm15, ymm0vpxord ymm12, ymm12, ymm1vpxord ymm13, ymm13, ymm2vpxord ymm14, ymm14, ymm3vprord ymm15, ymm15, 16vprord ymm12, ymm12, 16vprord ymm13, ymm13, 16vprord ymm14, ymm14, 16vpaddd ymm10, ymm10, ymm15vpaddd ymm11, ymm11, ymm12vpaddd ymm8, ymm8, ymm13vpaddd ymm9, ymm9, ymm14vpxord ymm5, ymm5, ymm10vpxord ymm6, ymm6, ymm11vpxord ymm7, ymm7, ymm8vpxord ymm4, ymm4, ymm9vprord ymm5, ymm5, 12vprord ymm6, ymm6, 12vprord ymm7, ymm7, 12vprord ymm4, ymm4, 12vpaddd ymm0, ymm0, ymm21vpaddd ymm1, ymm1, ymm16vpaddd ymm2, ymm2, ymm31vpaddd ymm3, ymm3, ymm17vpaddd ymm0, ymm0, ymm5vpaddd ymm1, ymm1, ymm6vpaddd ymm2, ymm2, ymm7vpaddd ymm3, ymm3, ymm4vpxord ymm15, ymm15, ymm0vpxord ymm12, ymm12, ymm1vpxord ymm13, ymm13, ymm2vpxord ymm14, ymm14, ymm3vprord ymm15, ymm15, 8vprord ymm12, ymm12, 8vprord ymm13, ymm13, 8vprord ymm14, ymm14, 8vpaddd ymm10, ymm10, ymm15vpaddd ymm11, ymm11, ymm12vpaddd ymm8, ymm8, ymm13vpaddd ymm9, ymm9, ymm14vpxord ymm5, ymm5, ymm10vpxord ymm6, ymm6, ymm11vpxord ymm7, ymm7, ymm8vpxord ymm4, ymm4, ymm9vprord ymm5, ymm5, 7vprord ymm6, ymm6, 7vprord ymm7, ymm7, 7vprord ymm4, ymm4, 7vpaddd ymm0, ymm0, ymm26vpaddd ymm1, ymm1, ymm28vpaddd ymm2, ymm2, ymm30vpaddd ymm3, ymm3, ymm29vpaddd ymm0, ymm0, ymm4vpaddd ymm1, ymm1, ymm5vpaddd ymm2, ymm2, ymm6vpaddd ymm3, ymm3, ymm7vpxord ymm12, ymm12, ymm0vpxord ymm13, ymm13, ymm1vpxord ymm14, ymm14, ymm2vpxord ymm15, ymm15, ymm3vprord ymm12, ymm12, 16vprord ymm13, ymm13, 16vprord ymm14, ymm14, 16vprord ymm15, ymm15, 16vpaddd ymm8, ymm8, ymm12vpaddd ymm9, ymm9, ymm13vpaddd ymm10, ymm10, ymm14vpaddd ymm11, ymm11, ymm15vpxord ymm4, ymm4, ymm8vpxord ymm5, ymm5, ymm9vpxord ymm6, ymm6, ymm10vpxord ymm7, ymm7, ymm11vprord ymm4, ymm4, 12vprord ymm5, ymm5, 12vprord ymm6, ymm6, 12vprord ymm7, ymm7, 12vpaddd ymm0, ymm0, ymm23vpaddd ymm1, ymm1, ymm25vpaddd ymm2, ymm2, ymm19vpaddd ymm3, ymm3, ymm31vpaddd ymm0, ymm0, ymm4vpaddd ymm1, ymm1, ymm5vpaddd ymm2, ymm2, ymm6vpaddd ymm3, ymm3, ymm7vpxord ymm12, ymm12, ymm0vpxord ymm13, ymm13, ymm1vpxord ymm14, ymm14, ymm2vpxord ymm15, ymm15, ymm3vprord ymm12, ymm12, 8vprord ymm13, ymm13, 8vprord ymm14, ymm14, 8vprord ymm15, ymm15, 8vpaddd ymm8, ymm8, ymm12vpaddd ymm9, ymm9, ymm13vpaddd ymm10, ymm10, ymm14vpaddd ymm11, ymm11, ymm15vpxord ymm4, ymm4, ymm8vpxord ymm5, ymm5, ymm9vpxord ymm6, ymm6, ymm10vpxord ymm7, ymm7, ymm11vprord ymm4, ymm4, 7vprord ymm5, ymm5, 7vprord ymm6, ymm6, 7vprord ymm7, ymm7, 7vpaddd ymm0, ymm0, ymm20vpaddd ymm1, ymm1, ymm27vpaddd ymm2, ymm2, ymm21vpaddd ymm3, ymm3, ymm17vpaddd ymm0, ymm0, ymm5vpaddd ymm1, ymm1, ymm6vpaddd ymm2, ymm2, ymm7vpaddd ymm3, ymm3, ymm4vpxord ymm15, ymm15, ymm0vpxord ymm12, ymm12, ymm1vpxord ymm13, ymm13, ymm2vpxord ymm14, ymm14, ymm3vprord ymm15, ymm15, 16vprord ymm12, ymm12, 16vprord ymm13, ymm13, 16vprord ymm14, ymm14, 16vpaddd ymm10, ymm10, ymm15vpaddd ymm11, ymm11, ymm12vpaddd ymm8, ymm8, ymm13vpaddd ymm9, ymm9, ymm14vpxord ymm5, ymm5, ymm10vpxord ymm6, ymm6, ymm11vpxord ymm7, ymm7, ymm8vpxord ymm4, ymm4, ymm9vprord ymm5, ymm5, 12vprord ymm6, ymm6, 12vprord ymm7, ymm7, 12vprord ymm4, ymm4, 12vpaddd ymm0, ymm0, ymm16vpaddd ymm1, ymm1, ymm18vpaddd ymm2, ymm2, ymm24vpaddd ymm3, ymm3, ymm22vpaddd ymm0, ymm0, ymm5vpaddd ymm1, ymm1, ymm6vpaddd ymm2, ymm2, ymm7vpaddd ymm3, ymm3, ymm4vpxord ymm15, ymm15, ymm0vpxord ymm12, ymm12, ymm1vpxord ymm13, ymm13, ymm2vpxord ymm14, ymm14, ymm3vprord ymm15, ymm15, 8vprord ymm12, ymm12, 8vprord ymm13, ymm13, 8vprord ymm14, ymm14, 8vpaddd ymm10, ymm10, ymm15vpaddd ymm11, ymm11, ymm12vpaddd ymm8, ymm8, ymm13vpaddd ymm9, ymm9, ymm14vpxord ymm5, ymm5, ymm10vpxord ymm6, ymm6, ymm11vpxord ymm7, ymm7, ymm8vpxord ymm4, ymm4, ymm9vprord ymm5, ymm5, 7vprord ymm6, ymm6, 7vprord ymm7, ymm7, 7vprord ymm4, ymm4, 7vpaddd ymm0, ymm0, ymm28vpaddd ymm1, ymm1, ymm25vpaddd ymm2, ymm2, ymm31vpaddd ymm3, ymm3, ymm30vpaddd ymm0, ymm0, ymm4vpaddd ymm1, ymm1, ymm5vpaddd ymm2, ymm2, ymm6vpaddd ymm3, ymm3, ymm7vpxord ymm12, ymm12, ymm0vpxord ymm13, ymm13, ymm1vpxord ymm14, ymm14, ymm2vpxord ymm15, ymm15, ymm3vprord ymm12, ymm12, 16vprord ymm13, ymm13, 16vprord ymm14, ymm14, 16vprord ymm15, ymm15, 16vpaddd ymm8, ymm8, ymm12vpaddd ymm9, ymm9, ymm13vpaddd ymm10, ymm10, ymm14vpaddd ymm11, ymm11, ymm15vpxord ymm4, ymm4, ymm8vpxord ymm5, ymm5, ymm9vpxord ymm6, ymm6, ymm10vpxord ymm7, ymm7, ymm11vprord ymm4, ymm4, 12vprord ymm5, ymm5, 12vprord ymm6, ymm6, 12vprord ymm7, ymm7, 12vpaddd ymm0, ymm0, ymm29vpaddd ymm1, ymm1, ymm27vpaddd ymm2, ymm2, ymm26vpaddd ymm3, ymm3, ymm24vpaddd ymm0, ymm0, ymm4vpaddd ymm1, ymm1, ymm5vpaddd ymm2, ymm2, ymm6vpaddd ymm3, ymm3, ymm7vpxord ymm12, ymm12, ymm0vpxord ymm13, ymm13, ymm1vpxord ymm14, ymm14, ymm2vpxord ymm15, ymm15, ymm3vprord ymm12, ymm12, 8vprord ymm13, ymm13, 8vprord ymm14, ymm14, 8vprord ymm15, ymm15, 8vpaddd ymm8, ymm8, ymm12vpaddd ymm9, ymm9, ymm13vpaddd ymm10, ymm10, ymm14vpaddd ymm11, ymm11, ymm15vpxord ymm4, ymm4, ymm8vpxord ymm5, ymm5, ymm9vpxord ymm6, ymm6, ymm10vpxord ymm7, ymm7, ymm11vprord ymm4, ymm4, 7vprord ymm5, ymm5, 7vprord ymm6, ymm6, 7vprord ymm7, ymm7, 7vpaddd ymm0, ymm0, ymm23vpaddd ymm1, ymm1, ymm21vpaddd ymm2, ymm2, ymm16vpaddd ymm3, ymm3, ymm22vpaddd ymm0, ymm0, ymm5vpaddd ymm1, ymm1, ymm6vpaddd ymm2, ymm2, ymm7vpaddd ymm3, ymm3, ymm4vpxord ymm15, ymm15, ymm0vpxord ymm12, ymm12, ymm1vpxord ymm13, ymm13, ymm2vpxord ymm14, ymm14, ymm3vprord ymm15, ymm15, 16vprord ymm12, ymm12, 16vprord ymm13, ymm13, 16vprord ymm14, ymm14, 16vpaddd ymm10, ymm10, ymm15vpaddd ymm11, ymm11, ymm12vpaddd ymm8, ymm8, ymm13vpaddd ymm9, ymm9, ymm14vpxord ymm5, ymm5, ymm10vpxord ymm6, ymm6, ymm11vpxord ymm7, ymm7, ymm8vpxord ymm4, ymm4, ymm9vprord ymm5, ymm5, 12vprord ymm6, ymm6, 12vprord ymm7, ymm7, 12vprord ymm4, ymm4, 12vpaddd ymm0, ymm0, ymm18vpaddd ymm1, ymm1, ymm19vpaddd ymm2, ymm2, ymm17vpaddd ymm3, ymm3, ymm20vpaddd ymm0, ymm0, ymm5vpaddd ymm1, ymm1, ymm6vpaddd ymm2, ymm2, ymm7vpaddd ymm3, ymm3, ymm4vpxord ymm15, ymm15, ymm0vpxord ymm12, ymm12, ymm1vpxord ymm13, ymm13, ymm2vpxord ymm14, ymm14, ymm3vprord ymm15, ymm15, 8vprord ymm12, ymm12, 8vprord ymm13, ymm13, 8vprord ymm14, ymm14, 8vpaddd ymm10, ymm10, ymm15vpaddd ymm11, ymm11, ymm12vpaddd ymm8, ymm8, ymm13vpaddd ymm9, ymm9, ymm14vpxord ymm5, ymm5, ymm10vpxord ymm6, ymm6, ymm11vpxord ymm7, ymm7, ymm8vpxord ymm4, ymm4, ymm9vprord ymm5, ymm5, 7vprord ymm6, ymm6, 7vprord ymm7, ymm7, 7vprord ymm4, ymm4, 7vpaddd ymm0, ymm0, ymm25vpaddd ymm1, ymm1, ymm27vpaddd ymm2, ymm2, ymm24vpaddd ymm3, ymm3, ymm31vpaddd ymm0, ymm0, ymm4vpaddd ymm1, ymm1, ymm5vpaddd ymm2, ymm2, ymm6vpaddd ymm3, ymm3, ymm7vpxord ymm12, ymm12, ymm0vpxord ymm13, ymm13, ymm1vpxord ymm14, ymm14, ymm2vpxord ymm15, ymm15, ymm3vprord ymm12, ymm12, 16vprord ymm13, ymm13, 16vprord ymm14, ymm14, 16vprord ymm15, ymm15, 16vpaddd ymm8, ymm8, ymm12vpaddd ymm9, ymm9, ymm13vpaddd ymm10, ymm10, ymm14vpaddd ymm11, ymm11, ymm15vpxord ymm4, ymm4, ymm8vpxord ymm5, ymm5, ymm9vpxord ymm6, ymm6, ymm10vpxord ymm7, ymm7, ymm11vprord ymm4, ymm4, 12vprord ymm5, ymm5, 12vprord ymm6, ymm6, 12vprord ymm7, ymm7, 12vpaddd ymm0, ymm0, ymm30vpaddd ymm1, ymm1, ymm21vpaddd ymm2, ymm2, ymm28vpaddd ymm3, ymm3, ymm17vpaddd ymm0, ymm0, ymm4vpaddd ymm1, ymm1, ymm5vpaddd ymm2, ymm2, ymm6vpaddd ymm3, ymm3, ymm7vpxord ymm12, ymm12, ymm0vpxord ymm13, ymm13, ymm1vpxord ymm14, ymm14, ymm2vpxord ymm15, ymm15, ymm3vprord ymm12, ymm12, 8vprord ymm13, ymm13, 8vprord ymm14, ymm14, 8vprord ymm15, ymm15, 8vpaddd ymm8, ymm8, ymm12vpaddd ymm9, ymm9, ymm13vpaddd ymm10, ymm10, ymm14vpaddd ymm11, ymm11, ymm15vpxord ymm4, ymm4, ymm8vpxord ymm5, ymm5, ymm9vpxord ymm6, ymm6, ymm10vpxord ymm7, ymm7, ymm11vprord ymm4, ymm4, 7vprord ymm5, ymm5, 7vprord ymm6, ymm6, 7vprord ymm7, ymm7, 7vpaddd ymm0, ymm0, ymm29vpaddd ymm1, ymm1, ymm16vpaddd ymm2, ymm2, ymm18vpaddd ymm3, ymm3, ymm20vpaddd ymm0, ymm0, ymm5vpaddd ymm1, ymm1, ymm6vpaddd ymm2, ymm2, ymm7vpaddd ymm3, ymm3, ymm4vpxord ymm15, ymm15, ymm0vpxord ymm12, ymm12, ymm1vpxord ymm13, ymm13, ymm2vpxord ymm14, ymm14, ymm3vprord ymm15, ymm15, 16vprord ymm12, ymm12, 16vprord ymm13, ymm13, 16vprord ymm14, ymm14, 16vpaddd ymm10, ymm10, ymm15vpaddd ymm11, ymm11, ymm12vpaddd ymm8, ymm8, ymm13vpaddd ymm9, ymm9, ymm14vpxord ymm5, ymm5, ymm10vpxord ymm6, ymm6, ymm11vpxord ymm7, ymm7, ymm8vpxord ymm4, ymm4, ymm9vprord ymm5, ymm5, 12vprord ymm6, ymm6, 12vprord ymm7, ymm7, 12vprord ymm4, ymm4, 12vpaddd ymm0, ymm0, ymm19vpaddd ymm1, ymm1, ymm26vpaddd ymm2, ymm2, ymm22vpaddd ymm3, ymm3, ymm23vpaddd ymm0, ymm0, ymm5vpaddd ymm1, ymm1, ymm6vpaddd ymm2, ymm2, ymm7vpaddd ymm3, ymm3, ymm4vpxord ymm15, ymm15, ymm0vpxord ymm12, ymm12, ymm1vpxord ymm13, ymm13, ymm2vpxord ymm14, ymm14, ymm3vprord ymm15, ymm15, 8vprord ymm12, ymm12, 8vprord ymm13, ymm13, 8vprord ymm14, ymm14, 8vpaddd ymm10, ymm10, ymm15vpaddd ymm11, ymm11, ymm12vpaddd ymm8, ymm8, ymm13vpaddd ymm9, ymm9, ymm14vpxord ymm5, ymm5, ymm10vpxord ymm6, ymm6, ymm11vpxord ymm7, ymm7, ymm8vpxord ymm4, ymm4, ymm9vprord ymm5, ymm5, 7vprord ymm6, ymm6, 7vprord ymm7, ymm7, 7vprord ymm4, ymm4, 7vpaddd ymm0, ymm0, ymm27vpaddd ymm1, ymm1, ymm21vpaddd ymm2, ymm2, ymm17vpaddd ymm3, ymm3, ymm24vpaddd ymm0, ymm0, ymm4vpaddd ymm1, ymm1, ymm5vpaddd ymm2, ymm2, ymm6vpaddd ymm3, ymm3, ymm7vpxord ymm12, ymm12, ymm0vpxord ymm13, ymm13, ymm1vpxord ymm14, ymm14, ymm2vpxord ymm15, ymm15, ymm3vprord ymm12, ymm12, 16vprord ymm13, ymm13, 16vprord ymm14, ymm14, 16vprord ymm15, ymm15, 16vpaddd ymm8, ymm8, ymm12vpaddd ymm9, ymm9, ymm13vpaddd ymm10, ymm10, ymm14vpaddd ymm11, ymm11, ymm15vpxord ymm4, ymm4, ymm8vpxord ymm5, ymm5, ymm9vpxord ymm6, ymm6, ymm10vpxord ymm7, ymm7, ymm11vprord ymm4, ymm4, 12vprord ymm5, ymm5, 12vprord ymm6, ymm6, 12vprord ymm7, ymm7, 12vpaddd ymm0, ymm0, ymm31vpaddd ymm1, ymm1, ymm16vpaddd ymm2, ymm2, ymm25vpaddd ymm3, ymm3, ymm22vpaddd ymm0, ymm0, ymm4vpaddd ymm1, ymm1, ymm5vpaddd ymm2, ymm2, ymm6vpaddd ymm3, ymm3, ymm7vpxord ymm12, ymm12, ymm0vpxord ymm13, ymm13, ymm1vpxord ymm14, ymm14, ymm2vpxord ymm15, ymm15, ymm3vprord ymm12, ymm12, 8vprord ymm13, ymm13, 8vprord ymm14, ymm14, 8vprord ymm15, ymm15, 8vpaddd ymm8, ymm8, ymm12vpaddd ymm9, ymm9, ymm13vpaddd ymm10, ymm10, ymm14vpaddd ymm11, ymm11, ymm15vpxord ymm4, ymm4, ymm8vpxord ymm5, ymm5, ymm9vpxord ymm6, ymm6, ymm10vpxord ymm7, ymm7, ymm11vprord ymm4, ymm4, 7vprord ymm5, ymm5, 7vprord ymm6, ymm6, 7vprord ymm7, ymm7, 7vpaddd ymm0, ymm0, ymm30vpaddd ymm1, ymm1, ymm18vpaddd ymm2, ymm2, ymm19vpaddd ymm3, ymm3, ymm23vpaddd ymm0, ymm0, ymm5vpaddd ymm1, ymm1, ymm6vpaddd ymm2, ymm2, ymm7vpaddd ymm3, ymm3, ymm4vpxord ymm15, ymm15, ymm0vpxord ymm12, ymm12, ymm1vpxord ymm13, ymm13, ymm2vpxord ymm14, ymm14, ymm3vprord ymm15, ymm15, 16vprord ymm12, ymm12, 16vprord ymm13, ymm13, 16vprord ymm14, ymm14, 16vpaddd ymm10, ymm10, ymm15vpaddd ymm11, ymm11, ymm12vpaddd ymm8, ymm8, ymm13vpaddd ymm9, ymm9, ymm14vpxord ymm5, ymm5, ymm10vpxord ymm6, ymm6, ymm11vpxord ymm7, ymm7, ymm8vpxord ymm4, ymm4, ymm9vprord ymm5, ymm5, 12vprord ymm6, ymm6, 12vprord ymm7, ymm7, 12vprord ymm4, ymm4, 12vpaddd ymm0, ymm0, ymm26vpaddd ymm1, ymm1, ymm28vpaddd ymm2, ymm2, ymm20vpaddd ymm3, ymm3, ymm29vpaddd ymm0, ymm0, ymm5vpaddd ymm1, ymm1, ymm6vpaddd ymm2, ymm2, ymm7vpaddd ymm3, ymm3, ymm4vpxord ymm15, ymm15, ymm0vpxord ymm12, ymm12, ymm1vpxord ymm13, ymm13, ymm2vpxord ymm14, ymm14, ymm3vprord ymm15, ymm15, 8vprord ymm12, ymm12, 8vprord ymm13, ymm13, 8vprord ymm14, ymm14, 8vpaddd ymm10, ymm10, ymm15vpaddd ymm11, ymm11, ymm12vpaddd ymm8, ymm8, ymm13vpaddd ymm9, ymm9, ymm14vpxord ymm5, ymm5, ymm10vpxord ymm6, ymm6, ymm11vpxord ymm7, ymm7, ymm8vpxord ymm4, ymm4, ymm9vprord ymm5, ymm5, 7vprord ymm6, ymm6, 7vprord ymm7, ymm7, 7vprord ymm4, ymm4, 7vpxor ymm0, ymm0, ymm8vpxor ymm1, ymm1, ymm9vpxor ymm2, ymm2, ymm10vpxor ymm3, ymm3, ymm11vpxor ymm4, ymm4, ymm12vpxor ymm5, ymm5, ymm13vpxor ymm6, ymm6, ymm14vpxor ymm7, ymm7, ymm15movzx eax, byte ptr [rbp+0x38]jne 2bmov rbx, qword ptr [rbp+0x50]vunpcklps ymm8, ymm0, ymm1vunpcklps ymm9, ymm2, ymm3vunpckhps ymm10, ymm0, ymm1vunpcklps ymm11, ymm4, ymm5vunpcklps ymm0, ymm6, ymm7vshufps ymm12, ymm8, ymm9, 78vblendps ymm1, ymm8, ymm12, 0xCCvshufps ymm8, ymm11, ymm0, 78vunpckhps ymm13, ymm2, ymm3vblendps ymm2, ymm11, ymm8, 0xCCvblendps ymm3, ymm12, ymm9, 0xCCvperm2f128 ymm12, ymm1, ymm2, 0x20vmovups ymmword ptr [rbx], ymm12vunpckhps ymm14, ymm4, ymm5vblendps ymm4, ymm8, ymm0, 0xCCvunpckhps ymm15, ymm6, ymm7vperm2f128 ymm7, ymm3, ymm4, 0x20vmovups ymmword ptr [rbx+0x20], ymm7vshufps ymm5, ymm10, ymm13, 78vblendps ymm6, ymm5, ymm13, 0xCCvshufps ymm13, ymm14, ymm15, 78vblendps ymm10, ymm10, ymm5, 0xCCvblendps ymm14, ymm14, ymm13, 0xCCvperm2f128 ymm8, ymm10, ymm14, 0x20vmovups ymmword ptr [rbx+0x40], ymm8vblendps ymm15, ymm13, ymm15, 0xCCvperm2f128 ymm13, ymm6, ymm15, 0x20vmovups ymmword ptr [rbx+0x60], ymm13vperm2f128 ymm9, ymm1, ymm2, 0x31vperm2f128 ymm11, ymm3, ymm4, 0x31vmovups ymmword ptr [rbx+0x80], ymm9vperm2f128 ymm14, ymm10, ymm14, 0x31vperm2f128 ymm15, ymm6, ymm15, 0x31vmovups ymmword ptr [rbx+0xA0], ymm11vmovups ymmword ptr [rbx+0xC0], ymm14vmovups ymmword ptr [rbx+0xE0], ymm15vmovdqa ymm0, ymmword ptr [rsp]vmovdqa ymm2, ymmword ptr [rsp+0x2*0x20]vmovdqa32 ymm0 {k1}, ymmword ptr [rsp+0x1*0x20]vmovdqa32 ymm2 {k1}, ymmword ptr [rsp+0x3*0x20]vmovdqa ymmword ptr [rsp], ymm0vmovdqa ymmword ptr [rsp+0x2*0x20], ymm2add rbx, 256mov qword ptr [rbp+0x50], rbxadd rdi, 64sub rsi, 83:mov rbx, qword ptr [rbp+0x50]mov r15, qword ptr [rsp+0x80]movzx r13, byte ptr [rbp+0x38]movzx r12, byte ptr [rbp+0x48]test esi, 0x4je 3fvbroadcasti32x4 zmm0, xmmword ptr [rcx]vbroadcasti32x4 zmm1, xmmword ptr [rcx+0x1*0x10]vmovdqa xmm12, xmmword ptr [rsp]vmovdqa xmm13, xmmword ptr [rsp+0x4*0x10]vpunpckldq xmm14, xmm12, xmm13vpunpckhdq xmm15, xmm12, xmm13vpermq ymm14, ymm14, 0xDCvpermq ymm15, ymm15, 0xDCvpbroadcastd zmm12, dword ptr [BLAKE3_BLOCK_LEN+rip]vinserti64x4 zmm13, zmm14, ymm15, 0x01mov eax, 17476kmovw k2, eaxvpblendmd zmm13 {k2}, zmm13, zmm12vbroadcasti32x4 zmm15, xmmword ptr [BLAKE3_IV+rip]mov r8, qword ptr [rdi]mov r9, qword ptr [rdi+0x8]mov r10, qword ptr [rdi+0x10]mov r11, qword ptr [rdi+0x18]mov eax, 43690kmovw k3, eaxmov eax, 34952kmovw k4, eaxmovzx eax, byte ptr [rbp+0x40]or eax, r13dxor edx, edx.p2align 52:mov r14d, eaxor eax, r12dadd rdx, 64cmp rdx, r15cmovne eax, r14dmov dword ptr [rsp+0x88], eaxvmovdqa32 zmm2, zmm15vpbroadcastd zmm8, dword ptr [rsp+0x22*0x4]vpblendmd zmm3 {k4}, zmm13, zmm8vmovups zmm8, zmmword ptr [r8+rdx-0x1*0x40]vinserti32x4 zmm8, zmm8, xmmword ptr [r9+rdx-0x4*0x10], 0x01vinserti32x4 zmm8, zmm8, xmmword ptr [r10+rdx-0x4*0x10], 0x02vinserti32x4 zmm8, zmm8, xmmword ptr [r11+rdx-0x4*0x10], 0x03vmovups zmm9, zmmword ptr [r8+rdx-0x30]vinserti32x4 zmm9, zmm9, xmmword ptr [r9+rdx-0x3*0x10], 0x01vinserti32x4 zmm9, zmm9, xmmword ptr [r10+rdx-0x3*0x10], 0x02vinserti32x4 zmm9, zmm9, xmmword ptr [r11+rdx-0x3*0x10], 0x03vshufps zmm4, zmm8, zmm9, 136vshufps zmm5, zmm8, zmm9, 221vmovups zmm8, zmmword ptr [r8+rdx-0x20]vinserti32x4 zmm8, zmm8, xmmword ptr [r9+rdx-0x2*0x10], 0x01vinserti32x4 zmm8, zmm8, xmmword ptr [r10+rdx-0x2*0x10], 0x02vinserti32x4 zmm8, zmm8, xmmword ptr [r11+rdx-0x2*0x10], 0x03vmovups zmm9, zmmword ptr [r8+rdx-0x10]vinserti32x4 zmm9, zmm9, xmmword ptr [r9+rdx-0x1*0x10], 0x01vinserti32x4 zmm9, zmm9, xmmword ptr [r10+rdx-0x1*0x10], 0x02vinserti32x4 zmm9, zmm9, xmmword ptr [r11+rdx-0x1*0x10], 0x03vshufps zmm6, zmm8, zmm9, 136vshufps zmm7, zmm8, zmm9, 221vpshufd zmm6, zmm6, 0x93vpshufd zmm7, zmm7, 0x93mov al, 79:vpaddd zmm0, zmm0, zmm4vpaddd zmm0, zmm0, zmm1vpxord zmm3, zmm3, zmm0vprord zmm3, zmm3, 16vpaddd zmm2, zmm2, zmm3vpxord zmm1, zmm1, zmm2vprord zmm1, zmm1, 12vpaddd zmm0, zmm0, zmm5vpaddd zmm0, zmm0, zmm1vpxord zmm3, zmm3, zmm0vprord zmm3, zmm3, 8vpaddd zmm2, zmm2, zmm3vpxord zmm1, zmm1, zmm2vprord zmm1, zmm1, 7vpshufd zmm0, zmm0, 0x93vpshufd zmm3, zmm3, 0x4Evpshufd zmm2, zmm2, 0x39vpaddd zmm0, zmm0, zmm6vpaddd zmm0, zmm0, zmm1vpxord zmm3, zmm3, zmm0vprord zmm3, zmm3, 16vpaddd zmm2, zmm2, zmm3vpxord zmm1, zmm1, zmm2vprord zmm1, zmm1, 12vpaddd zmm0, zmm0, zmm7vpaddd zmm0, zmm0, zmm1vpxord zmm3, zmm3, zmm0vprord zmm3, zmm3, 8vpaddd zmm2, zmm2, zmm3vpxord zmm1, zmm1, zmm2vprord zmm1, zmm1, 7vpshufd zmm0, zmm0, 0x39vpshufd zmm3, zmm3, 0x4Evpshufd zmm2, zmm2, 0x93dec aljz 9fvshufps zmm8, zmm4, zmm5, 214vpshufd zmm9, zmm4, 0x0Fvpshufd zmm4, zmm8, 0x39vshufps zmm8, zmm6, zmm7, 250vpblendmd zmm9 {k3}, zmm9, zmm8vpunpcklqdq zmm8, zmm7, zmm5vpblendmd zmm8 {k4}, zmm8, zmm6vpshufd zmm8, zmm8, 0x78vpunpckhdq zmm5, zmm5, zmm7vpunpckldq zmm6, zmm6, zmm5vpshufd zmm7, zmm6, 0x1Evmovdqa32 zmm5, zmm9vmovdqa32 zmm6, zmm8jmp 9b9:vpxord zmm0, zmm0, zmm2vpxord zmm1, zmm1, zmm3mov eax, r13dcmp rdx, r15jne 2bvmovdqu xmmword ptr [rbx], xmm0vmovdqu xmmword ptr [rbx+0x10], xmm1vextracti128 xmmword ptr [rbx+0x20], ymm0, 0x01vextracti128 xmmword ptr [rbx+0x30], ymm1, 0x01vextracti32x4 xmmword ptr [rbx+0x4*0x10], zmm0, 0x02vextracti32x4 xmmword ptr [rbx+0x5*0x10], zmm1, 0x02vextracti32x4 xmmword ptr [rbx+0x6*0x10], zmm0, 0x03vextracti32x4 xmmword ptr [rbx+0x7*0x10], zmm1, 0x03vmovdqa xmm0, xmmword ptr [rsp]vmovdqa xmm2, xmmword ptr [rsp+0x40]vmovdqa32 xmm0 {k1}, xmmword ptr [rsp+0x1*0x10]vmovdqa32 xmm2 {k1}, xmmword ptr [rsp+0x5*0x10]vmovdqa xmmword ptr [rsp], xmm0vmovdqa xmmword ptr [rsp+0x40], xmm2add rbx, 128add rdi, 32sub rsi, 43:test esi, 0x2je 3fvbroadcasti128 ymm0, xmmword ptr [rcx]vbroadcasti128 ymm1, xmmword ptr [rcx+0x10]vmovd xmm13, dword ptr [rsp]vpinsrd xmm13, xmm13, dword ptr [rsp+0x40], 1vpinsrd xmm13, xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2vmovd xmm14, dword ptr [rsp+0x4]vpinsrd xmm14, xmm14, dword ptr [rsp+0x44], 1vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2vinserti128 ymm13, ymm13, xmm14, 0x01mov r8, qword ptr [rdi]mov r9, qword ptr [rdi+0x8]movzx eax, byte ptr [rbp+0x40]or eax, r13dxor edx, edx.p2align 52:mov r14d, eaxor eax, r12dadd rdx, 64cmp rdx, r15cmovne eax, r14dmov dword ptr [rsp+0x88], eaxvbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV+rip]vpbroadcastd ymm8, dword ptr [rsp+0x88]vpblendd ymm3, ymm13, ymm8, 0x88vmovups ymm8, ymmword ptr [r8+rdx-0x40]vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-0x40], 0x01vmovups ymm9, ymmword ptr [r8+rdx-0x30]vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-0x30], 0x01vshufps ymm4, ymm8, ymm9, 136vshufps ymm5, ymm8, ymm9, 221vmovups ymm8, ymmword ptr [r8+rdx-0x20]vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-0x20], 0x01vmovups ymm9, ymmword ptr [r8+rdx-0x10]vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-0x10], 0x01vshufps ymm6, ymm8, ymm9, 136vshufps ymm7, ymm8, ymm9, 221vpshufd ymm6, ymm6, 0x93vpshufd ymm7, ymm7, 0x93mov al, 79:vpaddd ymm0, ymm0, ymm4vpaddd ymm0, ymm0, ymm1vpxord ymm3, ymm3, ymm0vprord ymm3, ymm3, 16vpaddd ymm2, ymm2, ymm3vpxord ymm1, ymm1, ymm2vprord ymm1, ymm1, 12vpaddd ymm0, ymm0, ymm5vpaddd ymm0, ymm0, ymm1vpxord ymm3, ymm3, ymm0vprord ymm3, ymm3, 8vpaddd ymm2, ymm2, ymm3vpxord ymm1, ymm1, ymm2vprord ymm1, ymm1, 7vpshufd ymm0, ymm0, 0x93vpshufd ymm3, ymm3, 0x4Evpshufd ymm2, ymm2, 0x39vpaddd ymm0, ymm0, ymm6vpaddd ymm0, ymm0, ymm1vpxord ymm3, ymm3, ymm0vprord ymm3, ymm3, 16vpaddd ymm2, ymm2, ymm3vpxord ymm1, ymm1, ymm2vprord ymm1, ymm1, 12vpaddd ymm0, ymm0, ymm7vpaddd ymm0, ymm0, ymm1vpxord ymm3, ymm3, ymm0vprord ymm3, ymm3, 8vpaddd ymm2, ymm2, ymm3vpxord ymm1, ymm1, ymm2vprord ymm1, ymm1, 7vpshufd ymm0, ymm0, 0x39vpshufd ymm3, ymm3, 0x4Evpshufd ymm2, ymm2, 0x93dec aljz 9fvshufps ymm8, ymm4, ymm5, 214vpshufd ymm9, ymm4, 0x0Fvpshufd ymm4, ymm8, 0x39vshufps ymm8, ymm6, ymm7, 250vpblendd ymm9, ymm9, ymm8, 0xAAvpunpcklqdq ymm8, ymm7, ymm5vpblendd ymm8, ymm8, ymm6, 0x88vpshufd ymm8, ymm8, 0x78vpunpckhdq ymm5, ymm5, ymm7vpunpckldq ymm6, ymm6, ymm5vpshufd ymm7, ymm6, 0x1Evmovdqa ymm5, ymm9vmovdqa ymm6, ymm8jmp 9b9:vpxor ymm0, ymm0, ymm2vpxor ymm1, ymm1, ymm3mov eax, r13dcmp rdx, r15jne 2bvmovdqu xmmword ptr [rbx], xmm0vmovdqu xmmword ptr [rbx+0x10], xmm1vextracti128 xmmword ptr [rbx+0x20], ymm0, 0x01vextracti128 xmmword ptr [rbx+0x30], ymm1, 0x01vmovdqa xmm0, xmmword ptr [rsp]vmovdqa xmm2, xmmword ptr [rsp+0x4*0x10]vmovdqu32 xmm0 {k1}, xmmword ptr [rsp+0x8]vmovdqu32 xmm2 {k1}, xmmword ptr [rsp+0x48]vmovdqa xmmword ptr [rsp], xmm0vmovdqa xmmword ptr [rsp+0x4*0x10], xmm2add rbx, 64add rdi, 16sub rsi, 23:test esi, 0x1je 4bvmovdqu xmm0, xmmword ptr [rcx]vmovdqu xmm1, xmmword ptr [rcx+0x10]vmovd xmm14, dword ptr [rsp]vpinsrd xmm14, xmm14, dword ptr [rsp+0x40], 1vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2vmovdqa xmm15, xmmword ptr [BLAKE3_IV+rip]mov r8, qword ptr [rdi]movzx eax, byte ptr [rbp+0x40]or eax, r13dxor edx, edx.p2align 52:mov r14d, eaxor eax, r12dadd rdx, 64cmp rdx, r15cmovne eax, r14dvpinsrd xmm3, xmm14, eax, 3vmovdqa xmm2, xmm15vmovups xmm8, xmmword ptr [r8+rdx-0x40]vmovups xmm9, xmmword ptr [r8+rdx-0x30]vshufps xmm4, xmm8, xmm9, 136vshufps xmm5, xmm8, xmm9, 221vmovups xmm8, xmmword ptr [r8+rdx-0x20]vmovups xmm9, xmmword ptr [r8+rdx-0x10]vshufps xmm6, xmm8, xmm9, 136vshufps xmm7, xmm8, xmm9, 221vpshufd xmm6, xmm6, 0x93vpshufd xmm7, xmm7, 0x93mov al, 79:vpaddd xmm0, xmm0, xmm4vpaddd xmm0, xmm0, xmm1vpxord xmm3, xmm3, xmm0vprord xmm3, xmm3, 16vpaddd xmm2, xmm2, xmm3vpxord xmm1, xmm1, xmm2vprord xmm1, xmm1, 12vpaddd xmm0, xmm0, xmm5vpaddd xmm0, xmm0, xmm1vpxord xmm3, xmm3, xmm0vprord xmm3, xmm3, 8vpaddd xmm2, xmm2, xmm3vpxord xmm1, xmm1, xmm2vprord xmm1, xmm1, 7vpshufd xmm0, xmm0, 0x93vpshufd xmm3, xmm3, 0x4Evpshufd xmm2, xmm2, 0x39vpaddd xmm0, xmm0, xmm6vpaddd xmm0, xmm0, xmm1vpxord xmm3, xmm3, xmm0vprord xmm3, xmm3, 16vpaddd xmm2, xmm2, xmm3vpxord xmm1, xmm1, xmm2vprord xmm1, xmm1, 12vpaddd xmm0, xmm0, xmm7vpaddd xmm0, xmm0, xmm1vpxord xmm3, xmm3, xmm0vprord xmm3, xmm3, 8vpaddd xmm2, xmm2, xmm3vpxord xmm1, xmm1, xmm2vprord xmm1, xmm1, 7vpshufd xmm0, xmm0, 0x39vpshufd xmm3, xmm3, 0x4Evpshufd xmm2, xmm2, 0x93dec aljz 9fvshufps xmm8, xmm4, xmm5, 214vpshufd xmm9, xmm4, 0x0Fvpshufd xmm4, xmm8, 0x39vshufps xmm8, xmm6, xmm7, 250vpblendd xmm9, xmm9, xmm8, 0xAAvpunpcklqdq xmm8, xmm7, xmm5vpblendd xmm8, xmm8, xmm6, 0x88vpshufd xmm8, xmm8, 0x78vpunpckhdq xmm5, xmm5, xmm7vpunpckldq xmm6, xmm6, xmm5vpshufd xmm7, xmm6, 0x1Evmovdqa xmm5, xmm9vmovdqa xmm6, xmm8jmp 9b9:vpxor xmm0, xmm0, xmm2vpxor xmm1, xmm1, xmm3mov eax, r13dcmp rdx, r15jne 2bvmovdqu xmmword ptr [rbx], xmm0vmovdqu xmmword ptr [rbx+0x10], xmm1jmp 4b.p2align 6_blake3_compress_in_place_avx512:blake3_compress_in_place_avx512:_CET_ENDBRvmovdqu xmm0, xmmword ptr [rdi]vmovdqu xmm1, xmmword ptr [rdi+0x10]movzx eax, r8bmovzx edx, dlshl rax, 32add rdx, raxvmovq xmm3, rcxvmovq xmm4, rdxvpunpcklqdq xmm3, xmm3, xmm4vmovaps xmm2, xmmword ptr [BLAKE3_IV+rip]vmovups xmm8, xmmword ptr [rsi]vmovups xmm9, xmmword ptr [rsi+0x10]vshufps xmm4, xmm8, xmm9, 136vshufps xmm5, xmm8, xmm9, 221vmovups xmm8, xmmword ptr [rsi+0x20]vmovups xmm9, xmmword ptr [rsi+0x30]vshufps xmm6, xmm8, xmm9, 136vshufps xmm7, xmm8, xmm9, 221vpshufd xmm6, xmm6, 0x93vpshufd xmm7, xmm7, 0x93mov al, 79:vpaddd xmm0, xmm0, xmm4vpaddd xmm0, xmm0, xmm1vpxord xmm3, xmm3, xmm0vprord xmm3, xmm3, 16vpaddd xmm2, xmm2, xmm3vpxord xmm1, xmm1, xmm2vprord xmm1, xmm1, 12vpaddd xmm0, xmm0, xmm5vpaddd xmm0, xmm0, xmm1vpxord xmm3, xmm3, xmm0vprord xmm3, xmm3, 8vpaddd xmm2, xmm2, xmm3vpxord xmm1, xmm1, xmm2vprord xmm1, xmm1, 7vpshufd xmm0, xmm0, 0x93vpshufd xmm3, xmm3, 0x4Evpshufd xmm2, xmm2, 0x39vpaddd xmm0, xmm0, xmm6vpaddd xmm0, xmm0, xmm1vpxord xmm3, xmm3, xmm0vprord xmm3, xmm3, 16vpaddd xmm2, xmm2, xmm3vpxord xmm1, xmm1, xmm2vprord xmm1, xmm1, 12vpaddd xmm0, xmm0, xmm7vpaddd xmm0, xmm0, xmm1vpxord xmm3, xmm3, xmm0vprord xmm3, xmm3, 8vpaddd xmm2, xmm2, xmm3vpxord xmm1, xmm1, xmm2vprord xmm1, xmm1, 7vpshufd xmm0, xmm0, 0x39vpshufd xmm3, xmm3, 0x4Evpshufd xmm2, xmm2, 0x93dec aljz 9fvshufps xmm8, xmm4, xmm5, 214vpshufd xmm9, xmm4, 0x0Fvpshufd xmm4, xmm8, 0x39vshufps xmm8, xmm6, xmm7, 250vpblendd xmm9, xmm9, xmm8, 0xAAvpunpcklqdq xmm8, xmm7, xmm5vpblendd xmm8, xmm8, xmm6, 0x88vpshufd xmm8, xmm8, 0x78vpunpckhdq xmm5, xmm5, xmm7vpunpckldq xmm6, xmm6, xmm5vpshufd xmm7, xmm6, 0x1Evmovdqa xmm5, xmm9vmovdqa xmm6, xmm8jmp 9b9:vpxor xmm0, xmm0, xmm2vpxor xmm1, xmm1, xmm3vmovdqu xmmword ptr [rdi], xmm0vmovdqu xmmword ptr [rdi+0x10], xmm1ret.p2align 6_blake3_compress_xof_avx512:blake3_compress_xof_avx512:_CET_ENDBRvmovdqu xmm0, xmmword ptr [rdi]vmovdqu xmm1, xmmword ptr [rdi+0x10]movzx eax, r8bmovzx edx, dlshl rax, 32add rdx, raxvmovq xmm3, rcxvmovq xmm4, rdxvpunpcklqdq xmm3, xmm3, xmm4vmovaps xmm2, xmmword ptr [BLAKE3_IV+rip]vmovups xmm8, xmmword ptr [rsi]vmovups xmm9, xmmword ptr [rsi+0x10]vshufps xmm4, xmm8, xmm9, 136vshufps xmm5, xmm8, xmm9, 221vmovups xmm8, xmmword ptr [rsi+0x20]vmovups xmm9, xmmword ptr [rsi+0x30]vshufps xmm6, xmm8, xmm9, 136vshufps xmm7, xmm8, xmm9, 221vpshufd xmm6, xmm6, 0x93vpshufd xmm7, xmm7, 0x93mov al, 79:vpaddd xmm0, xmm0, xmm4vpaddd xmm0, xmm0, xmm1vpxord xmm3, xmm3, xmm0vprord xmm3, xmm3, 16vpaddd xmm2, xmm2, xmm3vpxord xmm1, xmm1, xmm2vprord xmm1, xmm1, 12vpaddd xmm0, xmm0, xmm5vpaddd xmm0, xmm0, xmm1vpxord xmm3, xmm3, xmm0vprord xmm3, xmm3, 8vpaddd xmm2, xmm2, xmm3vpxord xmm1, xmm1, xmm2vprord xmm1, xmm1, 7vpshufd xmm0, xmm0, 0x93vpshufd xmm3, xmm3, 0x4Evpshufd xmm2, xmm2, 0x39vpaddd xmm0, xmm0, xmm6vpaddd xmm0, xmm0, xmm1vpxord xmm3, xmm3, xmm0vprord xmm3, xmm3, 16vpaddd xmm2, xmm2, xmm3vpxord xmm1, xmm1, xmm2vprord xmm1, xmm1, 12vpaddd xmm0, xmm0, xmm7vpaddd xmm0, xmm0, xmm1vpxord xmm3, xmm3, xmm0vprord xmm3, xmm3, 8vpaddd xmm2, xmm2, xmm3vpxord xmm1, xmm1, xmm2vprord xmm1, xmm1, 7vpshufd xmm0, xmm0, 0x39vpshufd xmm3, xmm3, 0x4Evpshufd xmm2, xmm2, 0x93dec aljz 9fvshufps xmm8, xmm4, xmm5, 214vpshufd xmm9, xmm4, 0x0Fvpshufd xmm4, xmm8, 0x39vshufps xmm8, xmm6, xmm7, 250vpblendd xmm9, xmm9, xmm8, 0xAAvpunpcklqdq xmm8, xmm7, xmm5vpblendd xmm8, xmm8, xmm6, 0x88vpshufd xmm8, xmm8, 0x78vpunpckhdq xmm5, xmm5, xmm7vpunpckldq xmm6, xmm6, xmm5vpshufd xmm7, xmm6, 0x1Evmovdqa xmm5, xmm9vmovdqa xmm6, xmm8jmp 9b9:vpxor xmm0, xmm0, xmm2vpxor xmm1, xmm1, xmm3vpxor xmm2, xmm2, [rdi]vpxor xmm3, xmm3, [rdi+0x10]vmovdqu xmmword ptr [r9], xmm0vmovdqu xmmword ptr [r9+0x10], xmm1vmovdqu xmmword ptr [r9+0x20], xmm2vmovdqu xmmword ptr [r9+0x30], xmm3ret#ifdef __APPLE__.static_data#else.section .rodata#endif.p2align 6INDEX0:.long 0, 1, 2, 3, 16, 17, 18, 19.long 8, 9, 10, 11, 24, 25, 26, 27INDEX1:.long 4, 5, 6, 7, 20, 21, 22, 23.long 12, 13, 14, 15, 28, 29, 30, 31ADD0:.long 0, 1, 2, 3, 4, 5, 6, 7.long 8, 9, 10, 11, 12, 13, 14, 15ADD1: .long 1ADD16: .long 16BLAKE3_BLOCK_LEN:.long 64.p2align 6BLAKE3_IV:BLAKE3_IV_0:.long 0x6A09E667BLAKE3_IV_1:.long 0xBB67AE85BLAKE3_IV_2:.long 0x3C6EF372BLAKE3_IV_3:.long 0xA54FF53A
#if defined(__ELF__) && defined(__linux__).section .note.GNU-stack,"",%progbits#endif#if defined(__ELF__) && defined(__CET__) && defined(__has_include)#if __has_include(<cet.h>)#include <cet.h>#endif#endif#if !defined(_CET_ENDBR)#define _CET_ENDBR#endif.intel_syntax noprefix.global _blake3_hash_many_avx2.global blake3_hash_many_avx2#ifdef __APPLE__.text#else.section .text#endif.p2align 6_blake3_hash_many_avx2:blake3_hash_many_avx2:_CET_ENDBRpush r15push r14push r13push r12push rbxpush rbpmov rbp, rspsub rsp, 680and rsp, 0xFFFFFFFFFFFFFFC0neg r9dvmovd xmm0, r9dvpbroadcastd ymm0, xmm0vmovdqa ymmword ptr [rsp+0x280], ymm0vpand ymm1, ymm0, ymmword ptr [ADD0+rip]vpand ymm2, ymm0, ymmword ptr [ADD1+rip]vmovdqa ymmword ptr [rsp+0x220], ymm2vmovd xmm2, r8dvpbroadcastd ymm2, xmm2vpaddd ymm2, ymm2, ymm1vmovdqa ymmword ptr [rsp+0x240], ymm2vpxor ymm1, ymm1, ymmword ptr [CMP_MSB_MASK+rip]vpxor ymm2, ymm2, ymmword ptr [CMP_MSB_MASK+rip]vpcmpgtd ymm2, ymm1, ymm2shr r8, 32vmovd xmm3, r8dvpbroadcastd ymm3, xmm3vpsubd ymm3, ymm3, ymm2vmovdqa ymmword ptr [rsp+0x260], ymm3shl rdx, 6mov qword ptr [rsp+0x2A0], rdxcmp rsi, 8jc 3f2:vpbroadcastd ymm0, dword ptr [rcx]vpbroadcastd ymm1, dword ptr [rcx+0x4]vpbroadcastd ymm2, dword ptr [rcx+0x8]vpbroadcastd ymm3, dword ptr [rcx+0xC]vpbroadcastd ymm4, dword ptr [rcx+0x10]vpbroadcastd ymm5, dword ptr [rcx+0x14]vpbroadcastd ymm6, dword ptr [rcx+0x18]vpbroadcastd ymm7, dword ptr [rcx+0x1C]mov r8, qword ptr [rdi]mov r9, qword ptr [rdi+0x8]mov r10, qword ptr [rdi+0x10]mov r11, qword ptr [rdi+0x18]mov r12, qword ptr [rdi+0x20]mov r13, qword ptr [rdi+0x28]mov r14, qword ptr [rdi+0x30]mov r15, qword ptr [rdi+0x38]movzx eax, byte ptr [rbp+0x38]movzx ebx, byte ptr [rbp+0x40]or eax, ebxxor edx, edx.p2align 59:movzx ebx, byte ptr [rbp+0x48]or ebx, eaxadd rdx, 64cmp rdx, qword ptr [rsp+0x2A0]cmove eax, ebxmov dword ptr [rsp+0x200], eaxvmovups xmm8, xmmword ptr [r8+rdx-0x40]vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x40], 0x01vmovups xmm9, xmmword ptr [r9+rdx-0x40]vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x40], 0x01vunpcklpd ymm12, ymm8, ymm9vunpckhpd ymm13, ymm8, ymm9vmovups xmm10, xmmword ptr [r10+rdx-0x40]vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x40], 0x01vmovups xmm11, xmmword ptr [r11+rdx-0x40]vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x40], 0x01vunpcklpd ymm14, ymm10, ymm11vunpckhpd ymm15, ymm10, ymm11vshufps ymm8, ymm12, ymm14, 136vmovaps ymmword ptr [rsp], ymm8vshufps ymm9, ymm12, ymm14, 221vmovaps ymmword ptr [rsp+0x20], ymm9vshufps ymm10, ymm13, ymm15, 136vmovaps ymmword ptr [rsp+0x40], ymm10vshufps ymm11, ymm13, ymm15, 221vmovaps ymmword ptr [rsp+0x60], ymm11vmovups xmm8, xmmword ptr [r8+rdx-0x30]vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x30], 0x01vmovups xmm9, xmmword ptr [r9+rdx-0x30]vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x30], 0x01vunpcklpd ymm12, ymm8, ymm9vunpckhpd ymm13, ymm8, ymm9vmovups xmm10, xmmword ptr [r10+rdx-0x30]vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x30], 0x01vmovups xmm11, xmmword ptr [r11+rdx-0x30]vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x30], 0x01vunpcklpd ymm14, ymm10, ymm11vunpckhpd ymm15, ymm10, ymm11vshufps ymm8, ymm12, ymm14, 136vmovaps ymmword ptr [rsp+0x80], ymm8vshufps ymm9, ymm12, ymm14, 221vmovaps ymmword ptr [rsp+0xA0], ymm9vshufps ymm10, ymm13, ymm15, 136vmovaps ymmword ptr [rsp+0xC0], ymm10vshufps ymm11, ymm13, ymm15, 221vmovaps ymmword ptr [rsp+0xE0], ymm11vmovups xmm8, xmmword ptr [r8+rdx-0x20]vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x20], 0x01vmovups xmm9, xmmword ptr [r9+rdx-0x20]vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x20], 0x01vunpcklpd ymm12, ymm8, ymm9vunpckhpd ymm13, ymm8, ymm9vmovups xmm10, xmmword ptr [r10+rdx-0x20]vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x20], 0x01vmovups xmm11, xmmword ptr [r11+rdx-0x20]vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x20], 0x01vunpcklpd ymm14, ymm10, ymm11vunpckhpd ymm15, ymm10, ymm11vshufps ymm8, ymm12, ymm14, 136vmovaps ymmword ptr [rsp+0x100], ymm8vshufps ymm9, ymm12, ymm14, 221vmovaps ymmword ptr [rsp+0x120], ymm9vshufps ymm10, ymm13, ymm15, 136vmovaps ymmword ptr [rsp+0x140], ymm10vshufps ymm11, ymm13, ymm15, 221vmovaps ymmword ptr [rsp+0x160], ymm11vmovups xmm8, xmmword ptr [r8+rdx-0x10]vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x10], 0x01vmovups xmm9, xmmword ptr [r9+rdx-0x10]vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x10], 0x01vunpcklpd ymm12, ymm8, ymm9vunpckhpd ymm13, ymm8, ymm9vmovups xmm10, xmmword ptr [r10+rdx-0x10]vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x10], 0x01vmovups xmm11, xmmword ptr [r11+rdx-0x10]vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x10], 0x01vunpcklpd ymm14, ymm10, ymm11vunpckhpd ymm15, ymm10, ymm11vshufps ymm8, ymm12, ymm14, 136vmovaps ymmword ptr [rsp+0x180], ymm8vshufps ymm9, ymm12, ymm14, 221vmovaps ymmword ptr [rsp+0x1A0], ymm9vshufps ymm10, ymm13, ymm15, 136vmovaps ymmword ptr [rsp+0x1C0], ymm10vshufps ymm11, ymm13, ymm15, 221vmovaps ymmword ptr [rsp+0x1E0], ymm11vpbroadcastd ymm15, dword ptr [rsp+0x200]prefetcht0 [r8+rdx+0x80]prefetcht0 [r12+rdx+0x80]prefetcht0 [r9+rdx+0x80]prefetcht0 [r13+rdx+0x80]prefetcht0 [r10+rdx+0x80]prefetcht0 [r14+rdx+0x80]prefetcht0 [r11+rdx+0x80]prefetcht0 [r15+rdx+0x80]vpaddd ymm0, ymm0, ymmword ptr [rsp]vpaddd ymm1, ymm1, ymmword ptr [rsp+0x40]vpaddd ymm2, ymm2, ymmword ptr [rsp+0x80]vpaddd ymm3, ymm3, ymmword ptr [rsp+0xC0]vpaddd ymm0, ymm0, ymm4vpaddd ymm1, ymm1, ymm5vpaddd ymm2, ymm2, ymm6vpaddd ymm3, ymm3, ymm7vpxor ymm12, ymm0, ymmword ptr [rsp+0x240]vpxor ymm13, ymm1, ymmword ptr [rsp+0x260]vpxor ymm14, ymm2, ymmword ptr [BLAKE3_BLOCK_LEN+rip]vpxor ymm15, ymm3, ymm15vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]vpshufb ymm12, ymm12, ymm8vpshufb ymm13, ymm13, ymm8vpshufb ymm14, ymm14, ymm8vpshufb ymm15, ymm15, ymm8vpaddd ymm8, ymm12, ymmword ptr [BLAKE3_IV_0+rip]vpaddd ymm9, ymm13, ymmword ptr [BLAKE3_IV_1+rip]vpaddd ymm10, ymm14, ymmword ptr [BLAKE3_IV_2+rip]vpaddd ymm11, ymm15, ymmword ptr [BLAKE3_IV_3+rip]vpxor ymm4, ymm4, ymm8vpxor ymm5, ymm5, ymm9vpxor ymm6, ymm6, ymm10vpxor ymm7, ymm7, ymm11vmovdqa ymmword ptr [rsp+0x200], ymm8vpsrld ymm8, ymm4, 12vpslld ymm4, ymm4, 20vpor ymm4, ymm4, ymm8vpsrld ymm8, ymm5, 12vpslld ymm5, ymm5, 20vpor ymm5, ymm5, ymm8vpsrld ymm8, ymm6, 12vpslld ymm6, ymm6, 20vpor ymm6, ymm6, ymm8vpsrld ymm8, ymm7, 12vpslld ymm7, ymm7, 20vpor ymm7, ymm7, ymm8vpaddd ymm0, ymm0, ymmword ptr [rsp+0x20]vpaddd ymm1, ymm1, ymmword ptr [rsp+0x60]vpaddd ymm2, ymm2, ymmword ptr [rsp+0xA0]vpaddd ymm3, ymm3, ymmword ptr [rsp+0xE0]vpaddd ymm0, ymm0, ymm4vpaddd ymm1, ymm1, ymm5vpaddd ymm2, ymm2, ymm6vpaddd ymm3, ymm3, ymm7vpxor ymm12, ymm12, ymm0vpxor ymm13, ymm13, ymm1vpxor ymm14, ymm14, ymm2vpxor ymm15, ymm15, ymm3vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]vpshufb ymm12, ymm12, ymm8vpshufb ymm13, ymm13, ymm8vpshufb ymm14, ymm14, ymm8vpshufb ymm15, ymm15, ymm8vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]vpaddd ymm9, ymm9, ymm13vpaddd ymm10, ymm10, ymm14vpaddd ymm11, ymm11, ymm15vpxor ymm4, ymm4, ymm8vpxor ymm5, ymm5, ymm9vpxor ymm6, ymm6, ymm10vpxor ymm7, ymm7, ymm11vmovdqa ymmword ptr [rsp+0x200], ymm8vpsrld ymm8, ymm4, 7vpslld ymm4, ymm4, 25vpor ymm4, ymm4, ymm8vpsrld ymm8, ymm5, 7vpslld ymm5, ymm5, 25vpor ymm5, ymm5, ymm8vpsrld ymm8, ymm6, 7vpslld ymm6, ymm6, 25vpor ymm6, ymm6, ymm8vpsrld ymm8, ymm7, 7vpslld ymm7, ymm7, 25vpor ymm7, ymm7, ymm8vpaddd ymm0, ymm0, ymmword ptr [rsp+0x100]vpaddd ymm1, ymm1, ymmword ptr [rsp+0x140]vpaddd ymm2, ymm2, ymmword ptr [rsp+0x180]vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1C0]vpaddd ymm0, ymm0, ymm5vpaddd ymm1, ymm1, ymm6vpaddd ymm2, ymm2, ymm7vpaddd ymm3, ymm3, ymm4vpxor ymm15, ymm15, ymm0vpxor ymm12, ymm12, ymm1vpxor ymm13, ymm13, ymm2vpxor ymm14, ymm14, ymm3vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]vpshufb ymm15, ymm15, ymm8vpshufb ymm12, ymm12, ymm8vpshufb ymm13, ymm13, ymm8vpshufb ymm14, ymm14, ymm8vpaddd ymm10, ymm10, ymm15vpaddd ymm11, ymm11, ymm12vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]vpaddd ymm9, ymm9, ymm14vpxor ymm5, ymm5, ymm10vpxor ymm6, ymm6, ymm11vpxor ymm7, ymm7, ymm8vpxor ymm4, ymm4, ymm9vmovdqa ymmword ptr [rsp+0x200], ymm8vpsrld ymm8, ymm5, 12vpslld ymm5, ymm5, 20vpor ymm5, ymm5, ymm8vpsrld ymm8, ymm6, 12vpslld ymm6, ymm6, 20vpor ymm6, ymm6, ymm8vpsrld ymm8, ymm7, 12vpslld ymm7, ymm7, 20vpor ymm7, ymm7, ymm8vpsrld ymm8, ymm4, 12vpslld ymm4, ymm4, 20vpor ymm4, ymm4, ymm8vpaddd ymm0, ymm0, ymmword ptr [rsp+0x120]vpaddd ymm1, ymm1, ymmword ptr [rsp+0x160]vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1A0]vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1E0]vpaddd ymm0, ymm0, ymm5vpaddd ymm1, ymm1, ymm6vpaddd ymm2, ymm2, ymm7vpaddd ymm3, ymm3, ymm4vpxor ymm15, ymm15, ymm0vpxor ymm12, ymm12, ymm1vpxor ymm13, ymm13, ymm2vpxor ymm14, ymm14, ymm3vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]vpshufb ymm15, ymm15, ymm8vpshufb ymm12, ymm12, ymm8vpshufb ymm13, ymm13, ymm8vpshufb ymm14, ymm14, ymm8vpaddd ymm10, ymm10, ymm15vpaddd ymm11, ymm11, ymm12vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]vpaddd ymm9, ymm9, ymm14vpxor ymm5, ymm5, ymm10vpxor ymm6, ymm6, ymm11vpxor ymm7, ymm7, ymm8vpxor ymm4, ymm4, ymm9vmovdqa ymmword ptr [rsp+0x200], ymm8vpsrld ymm8, ymm5, 7vpslld ymm5, ymm5, 25vpor ymm5, ymm5, ymm8vpsrld ymm8, ymm6, 7vpslld ymm6, ymm6, 25vpor ymm6, ymm6, ymm8vpsrld ymm8, ymm7, 7vpslld ymm7, ymm7, 25vpor ymm7, ymm7, ymm8vpsrld ymm8, ymm4, 7vpslld ymm4, ymm4, 25vpor ymm4, ymm4, ymm8vpaddd ymm0, ymm0, ymmword ptr [rsp+0x40]vpaddd ymm1, ymm1, ymmword ptr [rsp+0x60]vpaddd ymm2, ymm2, ymmword ptr [rsp+0xE0]vpaddd ymm3, ymm3, ymmword ptr [rsp+0x80]vpaddd ymm0, ymm0, ymm4vpaddd ymm1, ymm1, ymm5vpaddd ymm2, ymm2, ymm6vpaddd ymm3, ymm3, ymm7vpxor ymm12, ymm12, ymm0vpxor ymm13, ymm13, ymm1vpxor ymm14, ymm14, ymm2vpxor ymm15, ymm15, ymm3vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]vpshufb ymm12, ymm12, ymm8vpshufb ymm13, ymm13, ymm8vpshufb ymm14, ymm14, ymm8vpshufb ymm15, ymm15, ymm8vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]vpaddd ymm9, ymm9, ymm13vpaddd ymm10, ymm10, ymm14vpaddd ymm11, ymm11, ymm15vpxor ymm4, ymm4, ymm8vpxor ymm5, ymm5, ymm9vpxor ymm6, ymm6, ymm10vpxor ymm7, ymm7, ymm11vmovdqa ymmword ptr [rsp+0x200], ymm8vpsrld ymm8, ymm4, 12vpslld ymm4, ymm4, 20vpor ymm4, ymm4, ymm8vpsrld ymm8, ymm5, 12vpslld ymm5, ymm5, 20vpor ymm5, ymm5, ymm8vpsrld ymm8, ymm6, 12vpslld ymm6, ymm6, 20vpor ymm6, ymm6, ymm8vpsrld ymm8, ymm7, 12vpslld ymm7, ymm7, 20vpor ymm7, ymm7, ymm8vpaddd ymm0, ymm0, ymmword ptr [rsp+0xC0]vpaddd ymm1, ymm1, ymmword ptr [rsp+0x140]vpaddd ymm2, ymm2, ymmword ptr [rsp]vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1A0]vpaddd ymm0, ymm0, ymm4vpaddd ymm1, ymm1, ymm5vpaddd ymm2, ymm2, ymm6vpaddd ymm3, ymm3, ymm7vpxor ymm12, ymm12, ymm0vpxor ymm13, ymm13, ymm1vpxor ymm14, ymm14, ymm2vpxor ymm15, ymm15, ymm3vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]vpshufb ymm12, ymm12, ymm8vpshufb ymm13, ymm13, ymm8vpshufb ymm14, ymm14, ymm8vpshufb ymm15, ymm15, ymm8vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]vpaddd ymm9, ymm9, ymm13vpaddd ymm10, ymm10, ymm14vpaddd ymm11, ymm11, ymm15vpxor ymm4, ymm4, ymm8vpxor ymm5, ymm5, ymm9vpxor ymm6, ymm6, ymm10vpxor ymm7, ymm7, ymm11vmovdqa ymmword ptr [rsp+0x200], ymm8vpsrld ymm8, ymm4, 7vpslld ymm4, ymm4, 25vpor ymm4, ymm4, ymm8vpsrld ymm8, ymm5, 7vpslld ymm5, ymm5, 25vpor ymm5, ymm5, ymm8vpsrld ymm8, ymm6, 7vpslld ymm6, ymm6, 25vpor ymm6, ymm6, ymm8vpsrld ymm8, ymm7, 7vpslld ymm7, ymm7, 25vpor ymm7, ymm7, ymm8vpaddd ymm0, ymm0, ymmword ptr [rsp+0x20]vpaddd ymm1, ymm1, ymmword ptr [rsp+0x180]vpaddd ymm2, ymm2, ymmword ptr [rsp+0x120]vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1E0]vpaddd ymm0, ymm0, ymm5vpaddd ymm1, ymm1, ymm6vpaddd ymm2, ymm2, ymm7vpaddd ymm3, ymm3, ymm4vpxor ymm15, ymm15, ymm0vpxor ymm12, ymm12, ymm1vpxor ymm13, ymm13, ymm2vpxor ymm14, ymm14, ymm3vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]vpshufb ymm15, ymm15, ymm8vpshufb ymm12, ymm12, ymm8vpshufb ymm13, ymm13, ymm8vpshufb ymm14, ymm14, ymm8vpaddd ymm10, ymm10, ymm15vpaddd ymm11, ymm11, ymm12vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]vpaddd ymm9, ymm9, ymm14vpxor ymm5, ymm5, ymm10vpxor ymm6, ymm6, ymm11vpxor ymm7, ymm7, ymm8vpxor ymm4, ymm4, ymm9vmovdqa ymmword ptr [rsp+0x200], ymm8vpsrld ymm8, ymm5, 12vpslld ymm5, ymm5, 20vpor ymm5, ymm5, ymm8vpsrld ymm8, ymm6, 12vpslld ymm6, ymm6, 20vpor ymm6, ymm6, ymm8vpsrld ymm8, ymm7, 12vpslld ymm7, ymm7, 20vpor ymm7, ymm7, ymm8vpsrld ymm8, ymm4, 12vpslld ymm4, ymm4, 20vpor ymm4, ymm4, ymm8vpaddd ymm0, ymm0, ymmword ptr [rsp+0x160]vpaddd ymm1, ymm1, ymmword ptr [rsp+0xA0]vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1C0]vpaddd ymm3, ymm3, ymmword ptr [rsp+0x100]vpaddd ymm0, ymm0, ymm5vpaddd ymm1, ymm1, ymm6vpaddd ymm2, ymm2, ymm7vpaddd ymm3, ymm3, ymm4vpxor ymm15, ymm15, ymm0vpxor ymm12, ymm12, ymm1vpxor ymm13, ymm13, ymm2vpxor ymm14, ymm14, ymm3vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]vpshufb ymm15, ymm15, ymm8vpshufb ymm12, ymm12, ymm8vpshufb ymm13, ymm13, ymm8vpshufb ymm14, ymm14, ymm8vpaddd ymm10, ymm10, ymm15vpaddd ymm11, ymm11, ymm12vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]vpaddd ymm9, ymm9, ymm14vpxor ymm5, ymm5, ymm10vpxor ymm6, ymm6, ymm11vpxor ymm7, ymm7, ymm8vpxor ymm4, ymm4, ymm9vmovdqa ymmword ptr [rsp+0x200], ymm8vpsrld ymm8, ymm5, 7vpslld ymm5, ymm5, 25vpor ymm5, ymm5, ymm8vpsrld ymm8, ymm6, 7vpslld ymm6, ymm6, 25vpor ymm6, ymm6, ymm8vpsrld ymm8, ymm7, 7vpslld ymm7, ymm7, 25vpor ymm7, ymm7, ymm8vpsrld ymm8, ymm4, 7vpslld ymm4, ymm4, 25vpor ymm4, ymm4, ymm8vpaddd ymm0, ymm0, ymmword ptr [rsp+0x60]vpaddd ymm1, ymm1, ymmword ptr [rsp+0x140]vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1A0]vpaddd ymm3, ymm3, ymmword ptr [rsp+0xE0]vpaddd ymm0, ymm0, ymm4vpaddd ymm1, ymm1, ymm5vpaddd ymm2, ymm2, ymm6vpaddd ymm3, ymm3, ymm7vpxor ymm12, ymm12, ymm0vpxor ymm13, ymm13, ymm1vpxor ymm14, ymm14, ymm2vpxor ymm15, ymm15, ymm3vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]vpshufb ymm12, ymm12, ymm8vpshufb ymm13, ymm13, ymm8vpshufb ymm14, ymm14, ymm8vpshufb ymm15, ymm15, ymm8vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]vpaddd ymm9, ymm9, ymm13vpaddd ymm10, ymm10, ymm14vpaddd ymm11, ymm11, ymm15vpxor ymm4, ymm4, ymm8vpxor ymm5, ymm5, ymm9vpxor ymm6, ymm6, ymm10vpxor ymm7, ymm7, ymm11vmovdqa ymmword ptr [rsp+0x200], ymm8vpsrld ymm8, ymm4, 12vpslld ymm4, ymm4, 20vpor ymm4, ymm4, ymm8vpsrld ymm8, ymm5, 12vpslld ymm5, ymm5, 20vpor ymm5, ymm5, ymm8vpsrld ymm8, ymm6, 12vpslld ymm6, ymm6, 20vpor ymm6, ymm6, ymm8vpsrld ymm8, ymm7, 12vpslld ymm7, ymm7, 20vpor ymm7, ymm7, ymm8vpaddd ymm0, ymm0, ymmword ptr [rsp+0x80]vpaddd ymm1, ymm1, ymmword ptr [rsp+0x180]vpaddd ymm2, ymm2, ymmword ptr [rsp+0x40]vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1C0]vpaddd ymm0, ymm0, ymm4vpaddd ymm1, ymm1, ymm5vpaddd ymm2, ymm2, ymm6vpaddd ymm3, ymm3, ymm7vpxor ymm12, ymm12, ymm0vpxor ymm13, ymm13, ymm1vpxor ymm14, ymm14, ymm2vpxor ymm15, ymm15, ymm3vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]vpshufb ymm12, ymm12, ymm8vpshufb ymm13, ymm13, ymm8vpshufb ymm14, ymm14, ymm8vpshufb ymm15, ymm15, ymm8vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]vpaddd ymm9, ymm9, ymm13vpaddd ymm10, ymm10, ymm14vpaddd ymm11, ymm11, ymm15vpxor ymm4, ymm4, ymm8vpxor ymm5, ymm5, ymm9vpxor ymm6, ymm6, ymm10vpxor ymm7, ymm7, ymm11vmovdqa ymmword ptr [rsp+0x200], ymm8vpsrld ymm8, ymm4, 7vpslld ymm4, ymm4, 25vpor ymm4, ymm4, ymm8vpsrld ymm8, ymm5, 7vpslld ymm5, ymm5, 25vpor ymm5, ymm5, ymm8vpsrld ymm8, ymm6, 7vpslld ymm6, ymm6, 25vpor ymm6, ymm6, ymm8vpsrld ymm8, ymm7, 7vpslld ymm7, ymm7, 25vpor ymm7, ymm7, ymm8vpaddd ymm0, ymm0, ymmword ptr [rsp+0xC0]vpaddd ymm1, ymm1, ymmword ptr [rsp+0x120]vpaddd ymm2, ymm2, ymmword ptr [rsp+0x160]vpaddd ymm3, ymm3, ymmword ptr [rsp+0x100]vpaddd ymm0, ymm0, ymm5vpaddd ymm1, ymm1, ymm6vpaddd ymm2, ymm2, ymm7vpaddd ymm3, ymm3, ymm4vpxor ymm15, ymm15, ymm0vpxor ymm12, ymm12, ymm1vpxor ymm13, ymm13, ymm2vpxor ymm14, ymm14, ymm3vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]vpshufb ymm15, ymm15, ymm8vpshufb ymm12, ymm12, ymm8vpshufb ymm13, ymm13, ymm8vpshufb ymm14, ymm14, ymm8vpaddd ymm10, ymm10, ymm15vpaddd ymm11, ymm11, ymm12vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]vpaddd ymm9, ymm9, ymm14vpxor ymm5, ymm5, ymm10vpxor ymm6, ymm6, ymm11vpxor ymm7, ymm7, ymm8vpxor ymm4, ymm4, ymm9vmovdqa ymmword ptr [rsp+0x200], ymm8vpsrld ymm8, ymm5, 12vpslld ymm5, ymm5, 20vpor ymm5, ymm5, ymm8vpsrld ymm8, ymm6, 12vpslld ymm6, ymm6, 20vpor ymm6, ymm6, ymm8vpsrld ymm8, ymm7, 12vpslld ymm7, ymm7, 20vpor ymm7, ymm7, ymm8vpsrld ymm8, ymm4, 12vpslld ymm4, ymm4, 20vpor ymm4, ymm4, ymm8vpaddd ymm0, ymm0, ymmword ptr [rsp+0xA0]vpaddd ymm1, ymm1, ymmword ptr [rsp]vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1E0]vpaddd ymm3, ymm3, ymmword ptr [rsp+0x20]vpaddd ymm0, ymm0, ymm5vpaddd ymm1, ymm1, ymm6vpaddd ymm2, ymm2, ymm7vpaddd ymm3, ymm3, ymm4vpxor ymm15, ymm15, ymm0vpxor ymm12, ymm12, ymm1vpxor ymm13, ymm13, ymm2vpxor ymm14, ymm14, ymm3vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]vpshufb ymm15, ymm15, ymm8vpshufb ymm12, ymm12, ymm8vpshufb ymm13, ymm13, ymm8vpshufb ymm14, ymm14, ymm8vpaddd ymm10, ymm10, ymm15vpaddd ymm11, ymm11, ymm12vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]vpaddd ymm9, ymm9, ymm14vpxor ymm5, ymm5, ymm10vpxor ymm6, ymm6, ymm11vpxor ymm7, ymm7, ymm8vpxor ymm4, ymm4, ymm9vmovdqa ymmword ptr [rsp+0x200], ymm8vpsrld ymm8, ymm5, 7vpslld ymm5, ymm5, 25vpor ymm5, ymm5, ymm8vpsrld ymm8, ymm6, 7vpslld ymm6, ymm6, 25vpor ymm6, ymm6, ymm8vpsrld ymm8, ymm7, 7vpslld ymm7, ymm7, 25vpor ymm7, ymm7, ymm8vpsrld ymm8, ymm4, 7vpslld ymm4, ymm4, 25vpor ymm4, ymm4, ymm8vpaddd ymm0, ymm0, ymmword ptr [rsp+0x140]vpaddd ymm1, ymm1, ymmword ptr [rsp+0x180]vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1C0]vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1A0]vpaddd ymm0, ymm0, ymm4vpaddd ymm1, ymm1, ymm5vpaddd ymm2, ymm2, ymm6vpaddd ymm3, ymm3, ymm7vpxor ymm12, ymm12, ymm0vpxor ymm13, ymm13, ymm1vpxor ymm14, ymm14, ymm2vpxor ymm15, ymm15, ymm3vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]vpshufb ymm12, ymm12, ymm8vpshufb ymm13, ymm13, ymm8vpshufb ymm14, ymm14, ymm8vpshufb ymm15, ymm15, ymm8vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]vpaddd ymm9, ymm9, ymm13vpaddd ymm10, ymm10, ymm14vpaddd ymm11, ymm11, ymm15vpxor ymm4, ymm4, ymm8vpxor ymm5, ymm5, ymm9vpxor ymm6, ymm6, ymm10vpxor ymm7, ymm7, ymm11vmovdqa ymmword ptr [rsp+0x200], ymm8vpsrld ymm8, ymm4, 12vpslld ymm4, ymm4, 20vpor ymm4, ymm4, ymm8vpsrld ymm8, ymm5, 12vpslld ymm5, ymm5, 20vpor ymm5, ymm5, ymm8vpsrld ymm8, ymm6, 12vpslld ymm6, ymm6, 20vpor ymm6, ymm6, ymm8vpsrld ymm8, ymm7, 12vpslld ymm7, ymm7, 20vpor ymm7, ymm7, ymm8vpaddd ymm0, ymm0, ymmword ptr [rsp+0xE0]vpaddd ymm1, ymm1, ymmword ptr [rsp+0x120]vpaddd ymm2, ymm2, ymmword ptr [rsp+0x60]vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1E0]vpaddd ymm0, ymm0, ymm4vpaddd ymm1, ymm1, ymm5vpaddd ymm2, ymm2, ymm6vpaddd ymm3, ymm3, ymm7vpxor ymm12, ymm12, ymm0vpxor ymm13, ymm13, ymm1vpxor ymm14, ymm14, ymm2vpxor ymm15, ymm15, ymm3vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]vpshufb ymm12, ymm12, ymm8vpshufb ymm13, ymm13, ymm8vpshufb ymm14, ymm14, ymm8vpshufb ymm15, ymm15, ymm8vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]vpaddd ymm9, ymm9, ymm13vpaddd ymm10, ymm10, ymm14vpaddd ymm11, ymm11, ymm15vpxor ymm4, ymm4, ymm8vpxor ymm5, ymm5, ymm9vpxor ymm6, ymm6, ymm10vpxor ymm7, ymm7, ymm11vmovdqa ymmword ptr [rsp+0x200], ymm8vpsrld ymm8, ymm4, 7vpslld ymm4, ymm4, 25vpor ymm4, ymm4, ymm8vpsrld ymm8, ymm5, 7vpslld ymm5, ymm5, 25vpor ymm5, ymm5, ymm8vpsrld ymm8, ymm6, 7vpslld ymm6, ymm6, 25vpor ymm6, ymm6, ymm8vpsrld ymm8, ymm7, 7vpslld ymm7, ymm7, 25vpor ymm7, ymm7, ymm8vpaddd ymm0, ymm0, ymmword ptr [rsp+0x80]vpaddd ymm1, ymm1, ymmword ptr [rsp+0x160]vpaddd ymm2, ymm2, ymmword ptr [rsp+0xA0]vpaddd ymm3, ymm3, ymmword ptr [rsp+0x20]vpaddd ymm0, ymm0, ymm5vpaddd ymm1, ymm1, ymm6vpaddd ymm2, ymm2, ymm7vpaddd ymm3, ymm3, ymm4vpxor ymm15, ymm15, ymm0vpxor ymm12, ymm12, ymm1vpxor ymm13, ymm13, ymm2vpxor ymm14, ymm14, ymm3vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]vpshufb ymm15, ymm15, ymm8vpshufb ymm12, ymm12, ymm8vpshufb ymm13, ymm13, ymm8vpshufb ymm14, ymm14, ymm8vpaddd ymm10, ymm10, ymm15vpaddd ymm11, ymm11, ymm12vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]vpaddd ymm9, ymm9, ymm14vpxor ymm5, ymm5, ymm10vpxor ymm6, ymm6, ymm11vpxor ymm7, ymm7, ymm8vpxor ymm4, ymm4, ymm9vmovdqa ymmword ptr [rsp+0x200], ymm8vpsrld ymm8, ymm5, 12vpslld ymm5, ymm5, 20vpor ymm5, ymm5, ymm8vpsrld ymm8, ymm6, 12vpslld ymm6, ymm6, 20vpor ymm6, ymm6, ymm8vpsrld ymm8, ymm7, 12vpslld ymm7, ymm7, 20vpor ymm7, ymm7, ymm8vpsrld ymm8, ymm4, 12vpslld ymm4, ymm4, 20vpor ymm4, ymm4, ymm8vpaddd ymm0, ymm0, ymmword ptr [rsp]vpaddd ymm1, ymm1, ymmword ptr [rsp+0x40]vpaddd ymm2, ymm2, ymmword ptr [rsp+0x100]vpaddd ymm3, ymm3, ymmword ptr [rsp+0xC0]vpaddd ymm0, ymm0, ymm5vpaddd ymm1, ymm1, ymm6vpaddd ymm2, ymm2, ymm7vpaddd ymm3, ymm3, ymm4vpxor ymm15, ymm15, ymm0vpxor ymm12, ymm12, ymm1vpxor ymm13, ymm13, ymm2vpxor ymm14, ymm14, ymm3vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]vpshufb ymm15, ymm15, ymm8vpshufb ymm12, ymm12, ymm8vpshufb ymm13, ymm13, ymm8vpshufb ymm14, ymm14, ymm8vpaddd ymm10, ymm10, ymm15vpaddd ymm11, ymm11, ymm12vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]vpaddd ymm9, ymm9, ymm14vpxor ymm5, ymm5, ymm10vpxor ymm6, ymm6, ymm11vpxor ymm7, ymm7, ymm8vpxor ymm4, ymm4, ymm9vmovdqa ymmword ptr [rsp+0x200], ymm8vpsrld ymm8, ymm5, 7vpslld ymm5, ymm5, 25vpor ymm5, ymm5, ymm8vpsrld ymm8, ymm6, 7vpslld ymm6, ymm6, 25vpor ymm6, ymm6, ymm8vpsrld ymm8, ymm7, 7vpslld ymm7, ymm7, 25vpor ymm7, ymm7, ymm8vpsrld ymm8, ymm4, 7vpslld ymm4, ymm4, 25vpor ymm4, ymm4, ymm8vpaddd ymm0, ymm0, ymmword ptr [rsp+0x180]vpaddd ymm1, ymm1, ymmword ptr [rsp+0x120]vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1E0]vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1C0]vpaddd ymm0, ymm0, ymm4vpaddd ymm1, ymm1, ymm5vpaddd ymm2, ymm2, ymm6vpaddd ymm3, ymm3, ymm7vpxor ymm12, ymm12, ymm0vpxor ymm13, ymm13, ymm1vpxor ymm14, ymm14, ymm2vpxor ymm15, ymm15, ymm3vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]vpshufb ymm12, ymm12, ymm8vpshufb ymm13, ymm13, ymm8vpshufb ymm14, ymm14, ymm8vpshufb ymm15, ymm15, ymm8vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]vpaddd ymm9, ymm9, ymm13vpaddd ymm10, ymm10, ymm14vpaddd ymm11, ymm11, ymm15vpxor ymm4, ymm4, ymm8vpxor ymm5, ymm5, ymm9vpxor ymm6, ymm6, ymm10vpxor ymm7, ymm7, ymm11vmovdqa ymmword ptr [rsp+0x200], ymm8vpsrld ymm8, ymm4, 12vpslld ymm4, ymm4, 20vpor ymm4, ymm4, ymm8vpsrld ymm8, ymm5, 12vpslld ymm5, ymm5, 20vpor ymm5, ymm5, ymm8vpsrld ymm8, ymm6, 12vpslld ymm6, ymm6, 20vpor ymm6, ymm6, ymm8vpsrld ymm8, ymm7, 12vpslld ymm7, ymm7, 20vpor ymm7, ymm7, ymm8vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1A0]vpaddd ymm1, ymm1, ymmword ptr [rsp+0x160]vpaddd ymm2, ymm2, ymmword ptr [rsp+0x140]vpaddd ymm3, ymm3, ymmword ptr [rsp+0x100]vpaddd ymm0, ymm0, ymm4vpaddd ymm1, ymm1, ymm5vpaddd ymm2, ymm2, ymm6vpaddd ymm3, ymm3, ymm7vpxor ymm12, ymm12, ymm0vpxor ymm13, ymm13, ymm1vpxor ymm14, ymm14, ymm2vpxor ymm15, ymm15, ymm3vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]vpshufb ymm12, ymm12, ymm8vpshufb ymm13, ymm13, ymm8vpshufb ymm14, ymm14, ymm8vpshufb ymm15, ymm15, ymm8vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]vpaddd ymm9, ymm9, ymm13vpaddd ymm10, ymm10, ymm14vpaddd ymm11, ymm11, ymm15vpxor ymm4, ymm4, ymm8vpxor ymm5, ymm5, ymm9vpxor ymm6, ymm6, ymm10vpxor ymm7, ymm7, ymm11vmovdqa ymmword ptr [rsp+0x200], ymm8vpsrld ymm8, ymm4, 7vpslld ymm4, ymm4, 25vpor ymm4, ymm4, ymm8vpsrld ymm8, ymm5, 7vpslld ymm5, ymm5, 25vpor ymm5, ymm5, ymm8vpsrld ymm8, ymm6, 7vpslld ymm6, ymm6, 25vpor ymm6, ymm6, ymm8vpsrld ymm8, ymm7, 7vpslld ymm7, ymm7, 25vpor ymm7, ymm7, ymm8vpaddd ymm0, ymm0, ymmword ptr [rsp+0xE0]vpaddd ymm1, ymm1, ymmword ptr [rsp+0xA0]vpaddd ymm2, ymm2, ymmword ptr [rsp]vpaddd ymm3, ymm3, ymmword ptr [rsp+0xC0]vpaddd ymm0, ymm0, ymm5vpaddd ymm1, ymm1, ymm6vpaddd ymm2, ymm2, ymm7vpaddd ymm3, ymm3, ymm4vpxor ymm15, ymm15, ymm0vpxor ymm12, ymm12, ymm1vpxor ymm13, ymm13, ymm2vpxor ymm14, ymm14, ymm3vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]vpshufb ymm15, ymm15, ymm8vpshufb ymm12, ymm12, ymm8vpshufb ymm13, ymm13, ymm8vpshufb ymm14, ymm14, ymm8vpaddd ymm10, ymm10, ymm15vpaddd ymm11, ymm11, ymm12vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]vpaddd ymm9, ymm9, ymm14vpxor ymm5, ymm5, ymm10vpxor ymm6, ymm6, ymm11vpxor ymm7, ymm7, ymm8vpxor ymm4, ymm4, ymm9vmovdqa ymmword ptr [rsp+0x200], ymm8vpsrld ymm8, ymm5, 12vpslld ymm5, ymm5, 20vpor ymm5, ymm5, ymm8vpsrld ymm8, ymm6, 12vpslld ymm6, ymm6, 20vpor ymm6, ymm6, ymm8vpsrld ymm8, ymm7, 12vpslld ymm7, ymm7, 20vpor ymm7, ymm7, ymm8vpsrld ymm8, ymm4, 12vpslld ymm4, ymm4, 20vpor ymm4, ymm4, ymm8vpaddd ymm0, ymm0, ymmword ptr [rsp+0x40]vpaddd ymm1, ymm1, ymmword ptr [rsp+0x60]vpaddd ymm2, ymm2, ymmword ptr [rsp+0x20]vpaddd ymm3, ymm3, ymmword ptr [rsp+0x80]vpaddd ymm0, ymm0, ymm5vpaddd ymm1, ymm1, ymm6vpaddd ymm2, ymm2, ymm7vpaddd ymm3, ymm3, ymm4vpxor ymm15, ymm15, ymm0vpxor ymm12, ymm12, ymm1vpxor ymm13, ymm13, ymm2vpxor ymm14, ymm14, ymm3vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]vpshufb ymm15, ymm15, ymm8vpshufb ymm12, ymm12, ymm8vpshufb ymm13, ymm13, ymm8vpshufb ymm14, ymm14, ymm8vpaddd ymm10, ymm10, ymm15vpaddd ymm11, ymm11, ymm12vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]vpaddd ymm9, ymm9, ymm14vpxor ymm5, ymm5, ymm10vpxor ymm6, ymm6, ymm11vpxor ymm7, ymm7, ymm8vpxor ymm4, ymm4, ymm9vmovdqa ymmword ptr [rsp+0x200], ymm8vpsrld ymm8, ymm5, 7vpslld ymm5, ymm5, 25vpor ymm5, ymm5, ymm8vpsrld ymm8, ymm6, 7vpslld ymm6, ymm6, 25vpor ymm6, ymm6, ymm8vpsrld ymm8, ymm7, 7vpslld ymm7, ymm7, 25vpor ymm7, ymm7, ymm8vpsrld ymm8, ymm4, 7vpslld ymm4, ymm4, 25vpor ymm4, ymm4, ymm8vpaddd ymm0, ymm0, ymmword ptr [rsp+0x120]vpaddd ymm1, ymm1, ymmword ptr [rsp+0x160]vpaddd ymm2, ymm2, ymmword ptr [rsp+0x100]vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1E0]vpaddd ymm0, ymm0, ymm4vpaddd ymm1, ymm1, ymm5vpaddd ymm2, ymm2, ymm6vpaddd ymm3, ymm3, ymm7vpxor ymm12, ymm12, ymm0vpxor ymm13, ymm13, ymm1vpxor ymm14, ymm14, ymm2vpxor ymm15, ymm15, ymm3vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]vpshufb ymm12, ymm12, ymm8vpshufb ymm13, ymm13, ymm8vpshufb ymm14, ymm14, ymm8vpshufb ymm15, ymm15, ymm8vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]vpaddd ymm9, ymm9, ymm13vpaddd ymm10, ymm10, ymm14vpaddd ymm11, ymm11, ymm15vpxor ymm4, ymm4, ymm8vpxor ymm5, ymm5, ymm9vpxor ymm6, ymm6, ymm10vpxor ymm7, ymm7, ymm11vmovdqa ymmword ptr [rsp+0x200], ymm8vpsrld ymm8, ymm4, 12vpslld ymm4, ymm4, 20vpor ymm4, ymm4, ymm8vpsrld ymm8, ymm5, 12vpslld ymm5, ymm5, 20vpor ymm5, ymm5, ymm8vpsrld ymm8, ymm6, 12vpslld ymm6, ymm6, 20vpor ymm6, ymm6, ymm8vpsrld ymm8, ymm7, 12vpslld ymm7, ymm7, 20vpor ymm7, ymm7, ymm8vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1C0]vpaddd ymm1, ymm1, ymmword ptr [rsp+0xA0]vpaddd ymm2, ymm2, ymmword ptr [rsp+0x180]vpaddd ymm3, ymm3, ymmword ptr [rsp+0x20]vpaddd ymm0, ymm0, ymm4vpaddd ymm1, ymm1, ymm5vpaddd ymm2, ymm2, ymm6vpaddd ymm3, ymm3, ymm7vpxor ymm12, ymm12, ymm0vpxor ymm13, ymm13, ymm1vpxor ymm14, ymm14, ymm2vpxor ymm15, ymm15, ymm3vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]vpshufb ymm12, ymm12, ymm8vpshufb ymm13, ymm13, ymm8vpshufb ymm14, ymm14, ymm8vpshufb ymm15, ymm15, ymm8vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]vpaddd ymm9, ymm9, ymm13vpaddd ymm10, ymm10, ymm14vpaddd ymm11, ymm11, ymm15vpxor ymm4, ymm4, ymm8vpxor ymm5, ymm5, ymm9vpxor ymm6, ymm6, ymm10vpxor ymm7, ymm7, ymm11vmovdqa ymmword ptr [rsp+0x200], ymm8vpsrld ymm8, ymm4, 7vpslld ymm4, ymm4, 25vpor ymm4, ymm4, ymm8vpsrld ymm8, ymm5, 7vpslld ymm5, ymm5, 25vpor ymm5, ymm5, ymm8vpsrld ymm8, ymm6, 7vpslld ymm6, ymm6, 25vpor ymm6, ymm6, ymm8vpsrld ymm8, ymm7, 7vpslld ymm7, ymm7, 25vpor ymm7, ymm7, ymm8vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1A0]vpaddd ymm1, ymm1, ymmword ptr [rsp]vpaddd ymm2, ymm2, ymmword ptr [rsp+0x40]vpaddd ymm3, ymm3, ymmword ptr [rsp+0x80]vpaddd ymm0, ymm0, ymm5vpaddd ymm1, ymm1, ymm6vpaddd ymm2, ymm2, ymm7vpaddd ymm3, ymm3, ymm4vpxor ymm15, ymm15, ymm0vpxor ymm12, ymm12, ymm1vpxor ymm13, ymm13, ymm2vpxor ymm14, ymm14, ymm3vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]vpshufb ymm15, ymm15, ymm8vpshufb ymm12, ymm12, ymm8vpshufb ymm13, ymm13, ymm8vpshufb ymm14, ymm14, ymm8vpaddd ymm10, ymm10, ymm15vpaddd ymm11, ymm11, ymm12vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]vpaddd ymm9, ymm9, ymm14vpxor ymm5, ymm5, ymm10vpxor ymm6, ymm6, ymm11vpxor ymm7, ymm7, ymm8vpxor ymm4, ymm4, ymm9vmovdqa ymmword ptr [rsp+0x200], ymm8vpsrld ymm8, ymm5, 12vpslld ymm5, ymm5, 20vpor ymm5, ymm5, ymm8vpsrld ymm8, ymm6, 12vpslld ymm6, ymm6, 20vpor ymm6, ymm6, ymm8vpsrld ymm8, ymm7, 12vpslld ymm7, ymm7, 20vpor ymm7, ymm7, ymm8vpsrld ymm8, ymm4, 12vpslld ymm4, ymm4, 20vpor ymm4, ymm4, ymm8vpaddd ymm0, ymm0, ymmword ptr [rsp+0x60]vpaddd ymm1, ymm1, ymmword ptr [rsp+0x140]vpaddd ymm2, ymm2, ymmword ptr [rsp+0xC0]vpaddd ymm3, ymm3, ymmword ptr [rsp+0xE0]vpaddd ymm0, ymm0, ymm5vpaddd ymm1, ymm1, ymm6vpaddd ymm2, ymm2, ymm7vpaddd ymm3, ymm3, ymm4vpxor ymm15, ymm15, ymm0vpxor ymm12, ymm12, ymm1vpxor ymm13, ymm13, ymm2vpxor ymm14, ymm14, ymm3vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]vpshufb ymm15, ymm15, ymm8vpshufb ymm12, ymm12, ymm8vpshufb ymm13, ymm13, ymm8vpshufb ymm14, ymm14, ymm8vpaddd ymm10, ymm10, ymm15vpaddd ymm11, ymm11, ymm12vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]vpaddd ymm9, ymm9, ymm14vpxor ymm5, ymm5, ymm10vpxor ymm6, ymm6, ymm11vpxor ymm7, ymm7, ymm8vpxor ymm4, ymm4, ymm9vmovdqa ymmword ptr [rsp+0x200], ymm8vpsrld ymm8, ymm5, 7vpslld ymm5, ymm5, 25vpor ymm5, ymm5, ymm8vpsrld ymm8, ymm6, 7vpslld ymm6, ymm6, 25vpor ymm6, ymm6, ymm8vpsrld ymm8, ymm7, 7vpslld ymm7, ymm7, 25vpor ymm7, ymm7, ymm8vpsrld ymm8, ymm4, 7vpslld ymm4, ymm4, 25vpor ymm4, ymm4, ymm8vpaddd ymm0, ymm0, ymmword ptr [rsp+0x160]vpaddd ymm1, ymm1, ymmword ptr [rsp+0xA0]vpaddd ymm2, ymm2, ymmword ptr [rsp+0x20]vpaddd ymm3, ymm3, ymmword ptr [rsp+0x100]vpaddd ymm0, ymm0, ymm4vpaddd ymm1, ymm1, ymm5vpaddd ymm2, ymm2, ymm6vpaddd ymm3, ymm3, ymm7vpxor ymm12, ymm12, ymm0vpxor ymm13, ymm13, ymm1vpxor ymm14, ymm14, ymm2vpxor ymm15, ymm15, ymm3vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]vpshufb ymm12, ymm12, ymm8vpshufb ymm13, ymm13, ymm8vpshufb ymm14, ymm14, ymm8vpshufb ymm15, ymm15, ymm8vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]vpaddd ymm9, ymm9, ymm13vpaddd ymm10, ymm10, ymm14vpaddd ymm11, ymm11, ymm15vpxor ymm4, ymm4, ymm8vpxor ymm5, ymm5, ymm9vpxor ymm6, ymm6, ymm10vpxor ymm7, ymm7, ymm11vmovdqa ymmword ptr [rsp+0x200], ymm8vpsrld ymm8, ymm4, 12vpslld ymm4, ymm4, 20vpor ymm4, ymm4, ymm8vpsrld ymm8, ymm5, 12vpslld ymm5, ymm5, 20vpor ymm5, ymm5, ymm8vpsrld ymm8, ymm6, 12vpslld ymm6, ymm6, 20vpor ymm6, ymm6, ymm8vpsrld ymm8, ymm7, 12vpslld ymm7, ymm7, 20vpor ymm7, ymm7, ymm8vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1E0]vpaddd ymm1, ymm1, ymmword ptr [rsp]vpaddd ymm2, ymm2, ymmword ptr [rsp+0x120]vpaddd ymm3, ymm3, ymmword ptr [rsp+0xC0]vpaddd ymm0, ymm0, ymm4vpaddd ymm1, ymm1, ymm5vpaddd ymm2, ymm2, ymm6vpaddd ymm3, ymm3, ymm7vpxor ymm12, ymm12, ymm0vpxor ymm13, ymm13, ymm1vpxor ymm14, ymm14, ymm2vpxor ymm15, ymm15, ymm3vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]vpshufb ymm12, ymm12, ymm8vpshufb ymm13, ymm13, ymm8vpshufb ymm14, ymm14, ymm8vpshufb ymm15, ymm15, ymm8vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]vpaddd ymm9, ymm9, ymm13vpaddd ymm10, ymm10, ymm14vpaddd ymm11, ymm11, ymm15vpxor ymm4, ymm4, ymm8vpxor ymm5, ymm5, ymm9vpxor ymm6, ymm6, ymm10vpxor ymm7, ymm7, ymm11vmovdqa ymmword ptr [rsp+0x200], ymm8vpsrld ymm8, ymm4, 7vpslld ymm4, ymm4, 25vpor ymm4, ymm4, ymm8vpsrld ymm8, ymm5, 7vpslld ymm5, ymm5, 25vpor ymm5, ymm5, ymm8vpsrld ymm8, ymm6, 7vpslld ymm6, ymm6, 25vpor ymm6, ymm6, ymm8vpsrld ymm8, ymm7, 7vpslld ymm7, ymm7, 25vpor ymm7, ymm7, ymm8vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1C0]vpaddd ymm1, ymm1, ymmword ptr [rsp+0x40]vpaddd ymm2, ymm2, ymmword ptr [rsp+0x60]vpaddd ymm3, ymm3, ymmword ptr [rsp+0xE0]vpaddd ymm0, ymm0, ymm5vpaddd ymm1, ymm1, ymm6vpaddd ymm2, ymm2, ymm7vpaddd ymm3, ymm3, ymm4vpxor ymm15, ymm15, ymm0vpxor ymm12, ymm12, ymm1vpxor ymm13, ymm13, ymm2vpxor ymm14, ymm14, ymm3vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]vpshufb ymm15, ymm15, ymm8vpshufb ymm12, ymm12, ymm8vpshufb ymm13, ymm13, ymm8vpshufb ymm14, ymm14, ymm8vpaddd ymm10, ymm10, ymm15vpaddd ymm11, ymm11, ymm12vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]vpaddd ymm9, ymm9, ymm14vpxor ymm5, ymm5, ymm10vpxor ymm6, ymm6, ymm11vpxor ymm7, ymm7, ymm8vpxor ymm4, ymm4, ymm9vmovdqa ymmword ptr [rsp+0x200], ymm8vpsrld ymm8, ymm5, 12vpslld ymm5, ymm5, 20vpor ymm5, ymm5, ymm8vpsrld ymm8, ymm6, 12vpslld ymm6, ymm6, 20vpor ymm6, ymm6, ymm8vpsrld ymm8, ymm7, 12vpslld ymm7, ymm7, 20vpor ymm7, ymm7, ymm8vpsrld ymm8, ymm4, 12vpslld ymm4, ymm4, 20vpor ymm4, ymm4, ymm8vpaddd ymm0, ymm0, ymmword ptr [rsp+0x140]vpaddd ymm1, ymm1, ymmword ptr [rsp+0x180]vpaddd ymm2, ymm2, ymmword ptr [rsp+0x80]vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1A0]vpaddd ymm0, ymm0, ymm5vpaddd ymm1, ymm1, ymm6vpaddd ymm2, ymm2, ymm7vpaddd ymm3, ymm3, ymm4vpxor ymm15, ymm15, ymm0vpxor ymm12, ymm12, ymm1vpxor ymm13, ymm13, ymm2vpxor ymm14, ymm14, ymm3vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]vpshufb ymm15, ymm15, ymm8vpshufb ymm12, ymm12, ymm8vpshufb ymm13, ymm13, ymm8vpshufb ymm14, ymm14, ymm8vpaddd ymm10, ymm10, ymm15vpaddd ymm11, ymm11, ymm12vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]vpaddd ymm9, ymm9, ymm14vpxor ymm5, ymm5, ymm10vpxor ymm6, ymm6, ymm11vpxor ymm7, ymm7, ymm8vpxor ymm4, ymm4, ymm9vpxor ymm0, ymm0, ymm8vpxor ymm1, ymm1, ymm9vpxor ymm2, ymm2, ymm10vpxor ymm3, ymm3, ymm11vpsrld ymm8, ymm5, 7vpslld ymm5, ymm5, 25vpor ymm5, ymm5, ymm8vpsrld ymm8, ymm6, 7vpslld ymm6, ymm6, 25vpor ymm6, ymm6, ymm8vpsrld ymm8, ymm7, 7vpslld ymm7, ymm7, 25vpor ymm7, ymm7, ymm8vpsrld ymm8, ymm4, 7vpslld ymm4, ymm4, 25vpor ymm4, ymm4, ymm8vpxor ymm4, ymm4, ymm12vpxor ymm5, ymm5, ymm13vpxor ymm6, ymm6, ymm14vpxor ymm7, ymm7, ymm15movzx eax, byte ptr [rbp+0x38]jne 9bmov rbx, qword ptr [rbp+0x50]vunpcklps ymm8, ymm0, ymm1vunpcklps ymm9, ymm2, ymm3vunpckhps ymm10, ymm0, ymm1vunpcklps ymm11, ymm4, ymm5vunpcklps ymm0, ymm6, ymm7vshufps ymm12, ymm8, ymm9, 78vblendps ymm1, ymm8, ymm12, 0xCCvshufps ymm8, ymm11, ymm0, 78vunpckhps ymm13, ymm2, ymm3vblendps ymm2, ymm11, ymm8, 0xCCvblendps ymm3, ymm12, ymm9, 0xCCvperm2f128 ymm12, ymm1, ymm2, 0x20vmovups ymmword ptr [rbx], ymm12vunpckhps ymm14, ymm4, ymm5vblendps ymm4, ymm8, ymm0, 0xCCvunpckhps ymm15, ymm6, ymm7vperm2f128 ymm7, ymm3, ymm4, 0x20vmovups ymmword ptr [rbx+0x20], ymm7vshufps ymm5, ymm10, ymm13, 78vblendps ymm6, ymm5, ymm13, 0xCCvshufps ymm13, ymm14, ymm15, 78vblendps ymm10, ymm10, ymm5, 0xCCvblendps ymm14, ymm14, ymm13, 0xCCvperm2f128 ymm8, ymm10, ymm14, 0x20vmovups ymmword ptr [rbx+0x40], ymm8vblendps ymm15, ymm13, ymm15, 0xCCvperm2f128 ymm13, ymm6, ymm15, 0x20vmovups ymmword ptr [rbx+0x60], ymm13vperm2f128 ymm9, ymm1, ymm2, 0x31vperm2f128 ymm11, ymm3, ymm4, 0x31vmovups ymmword ptr [rbx+0x80], ymm9vperm2f128 ymm14, ymm10, ymm14, 0x31vperm2f128 ymm15, ymm6, ymm15, 0x31vmovups ymmword ptr [rbx+0xA0], ymm11vmovups ymmword ptr [rbx+0xC0], ymm14vmovups ymmword ptr [rbx+0xE0], ymm15vmovdqa ymm0, ymmword ptr [rsp+0x220]vpaddd ymm1, ymm0, ymmword ptr [rsp+0x240]vmovdqa ymmword ptr [rsp+0x240], ymm1vpxor ymm0, ymm0, ymmword ptr [CMP_MSB_MASK+rip]vpxor ymm2, ymm1, ymmword ptr [CMP_MSB_MASK+rip]vpcmpgtd ymm2, ymm0, ymm2vmovdqa ymm0, ymmword ptr [rsp+0x260]vpsubd ymm2, ymm0, ymm2vmovdqa ymmword ptr [rsp+0x260], ymm2add rdi, 64add rbx, 256mov qword ptr [rbp+0x50], rbxsub rsi, 8cmp rsi, 8jnc 2btest rsi, rsijnz 3f4:vzerouppermov rsp, rbppop rbppop rbxpop r12pop r13pop r14pop r15ret.p2align 53:mov rbx, qword ptr [rbp+0x50]mov r15, qword ptr [rsp+0x2A0]movzx r13d, byte ptr [rbp+0x38]movzx r12d, byte ptr [rbp+0x48]test rsi, 0x4je 3fvbroadcasti128 ymm0, xmmword ptr [rcx]vbroadcasti128 ymm1, xmmword ptr [rcx+0x10]vmovdqa ymm8, ymm0vmovdqa ymm9, ymm1vbroadcasti128 ymm12, xmmword ptr [rsp+0x240]vbroadcasti128 ymm13, xmmword ptr [rsp+0x260]vpunpckldq ymm14, ymm12, ymm13vpunpckhdq ymm15, ymm12, ymm13vpermq ymm14, ymm14, 0x50vpermq ymm15, ymm15, 0x50vbroadcasti128 ymm12, xmmword ptr [BLAKE3_BLOCK_LEN+rip]vpblendd ymm14, ymm14, ymm12, 0x44vpblendd ymm15, ymm15, ymm12, 0x44vmovdqa ymmword ptr [rsp], ymm14vmovdqa ymmword ptr [rsp+0x20], ymm15mov r8, qword ptr [rdi]mov r9, qword ptr [rdi+0x8]mov r10, qword ptr [rdi+0x10]mov r11, qword ptr [rdi+0x18]movzx eax, byte ptr [rbp+0x40]or eax, r13dxor edx, edx.p2align 52:mov r14d, eaxor eax, r12dadd rdx, 64cmp rdx, r15cmovne eax, r14dmov dword ptr [rsp+0x200], eaxvmovups ymm2, ymmword ptr [r8+rdx-0x40]vinsertf128 ymm2, ymm2, xmmword ptr [r9+rdx-0x40], 0x01vmovups ymm3, ymmword ptr [r8+rdx-0x30]vinsertf128 ymm3, ymm3, xmmword ptr [r9+rdx-0x30], 0x01vshufps ymm4, ymm2, ymm3, 136vshufps ymm5, ymm2, ymm3, 221vmovups ymm2, ymmword ptr [r8+rdx-0x20]vinsertf128 ymm2, ymm2, xmmword ptr [r9+rdx-0x20], 0x01vmovups ymm3, ymmword ptr [r8+rdx-0x10]vinsertf128 ymm3, ymm3, xmmword ptr [r9+rdx-0x10], 0x01vshufps ymm6, ymm2, ymm3, 136vshufps ymm7, ymm2, ymm3, 221vpshufd ymm6, ymm6, 0x93vpshufd ymm7, ymm7, 0x93vmovups ymm10, ymmword ptr [r10+rdx-0x40]vinsertf128 ymm10, ymm10, xmmword ptr [r11+rdx-0x40], 0x01vmovups ymm11, ymmword ptr [r10+rdx-0x30]vinsertf128 ymm11, ymm11, xmmword ptr [r11+rdx-0x30], 0x01vshufps ymm12, ymm10, ymm11, 136vshufps ymm13, ymm10, ymm11, 221vmovups ymm10, ymmword ptr [r10+rdx-0x20]vinsertf128 ymm10, ymm10, xmmword ptr [r11+rdx-0x20], 0x01vmovups ymm11, ymmword ptr [r10+rdx-0x10]vinsertf128 ymm11, ymm11, xmmword ptr [r11+rdx-0x10], 0x01vshufps ymm14, ymm10, ymm11, 136vshufps ymm15, ymm10, ymm11, 221vpshufd ymm14, ymm14, 0x93vpshufd ymm15, ymm15, 0x93prefetcht0 [r8+rdx+0x80]prefetcht0 [r9+rdx+0x80]prefetcht0 [r10+rdx+0x80]prefetcht0 [r11+rdx+0x80]vpbroadcastd ymm2, dword ptr [rsp+0x200]vmovdqa ymm3, ymmword ptr [rsp]vmovdqa ymm11, ymmword ptr [rsp+0x20]vpblendd ymm3, ymm3, ymm2, 0x88vpblendd ymm11, ymm11, ymm2, 0x88vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV+rip]vmovdqa ymm10, ymm2mov al, 79:vpaddd ymm0, ymm0, ymm4vpaddd ymm8, ymm8, ymm12vmovdqa ymmword ptr [rsp+0x40], ymm4nopvmovdqa ymmword ptr [rsp+0x60], ymm12nopvpaddd ymm0, ymm0, ymm1vpaddd ymm8, ymm8, ymm9vpxor ymm3, ymm3, ymm0vpxor ymm11, ymm11, ymm8vbroadcasti128 ymm4, xmmword ptr [ROT16+rip]vpshufb ymm3, ymm3, ymm4vpshufb ymm11, ymm11, ymm4vpaddd ymm2, ymm2, ymm3vpaddd ymm10, ymm10, ymm11vpxor ymm1, ymm1, ymm2vpxor ymm9, ymm9, ymm10vpsrld ymm4, ymm1, 12vpslld ymm1, ymm1, 20vpor ymm1, ymm1, ymm4vpsrld ymm4, ymm9, 12vpslld ymm9, ymm9, 20vpor ymm9, ymm9, ymm4vpaddd ymm0, ymm0, ymm5vpaddd ymm8, ymm8, ymm13vpaddd ymm0, ymm0, ymm1vpaddd ymm8, ymm8, ymm9vmovdqa ymmword ptr [rsp+0x80], ymm5vmovdqa ymmword ptr [rsp+0xA0], ymm13vpxor ymm3, ymm3, ymm0vpxor ymm11, ymm11, ymm8vbroadcasti128 ymm4, xmmword ptr [ROT8+rip]vpshufb ymm3, ymm3, ymm4vpshufb ymm11, ymm11, ymm4vpaddd ymm2, ymm2, ymm3vpaddd ymm10, ymm10, ymm11vpxor ymm1, ymm1, ymm2vpxor ymm9, ymm9, ymm10vpsrld ymm4, ymm1, 7vpslld ymm1, ymm1, 25vpor ymm1, ymm1, ymm4vpsrld ymm4, ymm9, 7vpslld ymm9, ymm9, 25vpor ymm9, ymm9, ymm4vpshufd ymm0, ymm0, 0x93vpshufd ymm8, ymm8, 0x93vpshufd ymm3, ymm3, 0x4Evpshufd ymm11, ymm11, 0x4Evpshufd ymm2, ymm2, 0x39vpshufd ymm10, ymm10, 0x39vpaddd ymm0, ymm0, ymm6vpaddd ymm8, ymm8, ymm14vpaddd ymm0, ymm0, ymm1vpaddd ymm8, ymm8, ymm9vpxor ymm3, ymm3, ymm0vpxor ymm11, ymm11, ymm8vbroadcasti128 ymm4, xmmword ptr [ROT16+rip]vpshufb ymm3, ymm3, ymm4vpshufb ymm11, ymm11, ymm4vpaddd ymm2, ymm2, ymm3vpaddd ymm10, ymm10, ymm11vpxor ymm1, ymm1, ymm2vpxor ymm9, ymm9, ymm10vpsrld ymm4, ymm1, 12vpslld ymm1, ymm1, 20vpor ymm1, ymm1, ymm4vpsrld ymm4, ymm9, 12vpslld ymm9, ymm9, 20vpor ymm9, ymm9, ymm4vpaddd ymm0, ymm0, ymm7vpaddd ymm8, ymm8, ymm15vpaddd ymm0, ymm0, ymm1vpaddd ymm8, ymm8, ymm9vpxor ymm3, ymm3, ymm0vpxor ymm11, ymm11, ymm8vbroadcasti128 ymm4, xmmword ptr [ROT8+rip]vpshufb ymm3, ymm3, ymm4vpshufb ymm11, ymm11, ymm4vpaddd ymm2, ymm2, ymm3vpaddd ymm10, ymm10, ymm11vpxor ymm1, ymm1, ymm2vpxor ymm9, ymm9, ymm10vpsrld ymm4, ymm1, 7vpslld ymm1, ymm1, 25vpor ymm1, ymm1, ymm4vpsrld ymm4, ymm9, 7vpslld ymm9, ymm9, 25vpor ymm9, ymm9, ymm4vpshufd ymm0, ymm0, 0x39vpshufd ymm8, ymm8, 0x39vpshufd ymm3, ymm3, 0x4Evpshufd ymm11, ymm11, 0x4Evpshufd ymm2, ymm2, 0x93vpshufd ymm10, ymm10, 0x93dec alje 9fvmovdqa ymm4, ymmword ptr [rsp+0x40]vmovdqa ymm5, ymmword ptr [rsp+0x80]vshufps ymm12, ymm4, ymm5, 214vpshufd ymm13, ymm4, 0x0Fvpshufd ymm4, ymm12, 0x39vshufps ymm12, ymm6, ymm7, 250vpblendd ymm13, ymm13, ymm12, 0xAAvpunpcklqdq ymm12, ymm7, ymm5vpblendd ymm12, ymm12, ymm6, 0x88vpshufd ymm12, ymm12, 0x78vpunpckhdq ymm5, ymm5, ymm7vpunpckldq ymm6, ymm6, ymm5vpshufd ymm7, ymm6, 0x1Evmovdqa ymmword ptr [rsp+0x40], ymm13vmovdqa ymmword ptr [rsp+0x80], ymm12vmovdqa ymm12, ymmword ptr [rsp+0x60]vmovdqa ymm13, ymmword ptr [rsp+0xA0]vshufps ymm5, ymm12, ymm13, 214vpshufd ymm6, ymm12, 0x0Fvpshufd ymm12, ymm5, 0x39vshufps ymm5, ymm14, ymm15, 250vpblendd ymm6, ymm6, ymm5, 0xAAvpunpcklqdq ymm5, ymm15, ymm13vpblendd ymm5, ymm5, ymm14, 0x88vpshufd ymm5, ymm5, 0x78vpunpckhdq ymm13, ymm13, ymm15vpunpckldq ymm14, ymm14, ymm13vpshufd ymm15, ymm14, 0x1Evmovdqa ymm13, ymm6vmovdqa ymm14, ymm5vmovdqa ymm5, ymmword ptr [rsp+0x40]vmovdqa ymm6, ymmword ptr [rsp+0x80]jmp 9b9:vpxor ymm0, ymm0, ymm2vpxor ymm1, ymm1, ymm3vpxor ymm8, ymm8, ymm10vpxor ymm9, ymm9, ymm11mov eax, r13dcmp rdx, r15jne 2bvmovdqu xmmword ptr [rbx], xmm0vmovdqu xmmword ptr [rbx+0x10], xmm1vextracti128 xmmword ptr [rbx+0x20], ymm0, 0x01vextracti128 xmmword ptr [rbx+0x30], ymm1, 0x01vmovdqu xmmword ptr [rbx+0x40], xmm8vmovdqu xmmword ptr [rbx+0x50], xmm9vextracti128 xmmword ptr [rbx+0x60], ymm8, 0x01vextracti128 xmmword ptr [rbx+0x70], ymm9, 0x01vmovaps xmm8, xmmword ptr [rsp+0x280]vmovaps xmm0, xmmword ptr [rsp+0x240]vmovaps xmm1, xmmword ptr [rsp+0x250]vmovaps xmm2, xmmword ptr [rsp+0x260]vmovaps xmm3, xmmword ptr [rsp+0x270]vblendvps xmm0, xmm0, xmm1, xmm8vblendvps xmm2, xmm2, xmm3, xmm8vmovaps xmmword ptr [rsp+0x240], xmm0vmovaps xmmword ptr [rsp+0x260], xmm2add rbx, 128add rdi, 32sub rsi, 43:test rsi, 0x2je 3fvbroadcasti128 ymm0, xmmword ptr [rcx]vbroadcasti128 ymm1, xmmword ptr [rcx+0x10]vmovd xmm13, dword ptr [rsp+0x240]vpinsrd xmm13, xmm13, dword ptr [rsp+0x260], 1vpinsrd xmm13, xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2vmovd xmm14, dword ptr [rsp+0x244]vpinsrd xmm14, xmm14, dword ptr [rsp+0x264], 1vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2vinserti128 ymm13, ymm13, xmm14, 0x01vbroadcasti128 ymm14, xmmword ptr [ROT16+rip]vbroadcasti128 ymm15, xmmword ptr [ROT8+rip]mov r8, qword ptr [rdi]mov r9, qword ptr [rdi+0x8]movzx eax, byte ptr [rbp+0x40]or eax, r13dxor edx, edx.p2align 52:mov r14d, eaxor eax, r12dadd rdx, 64cmp rdx, r15cmovne eax, r14dmov dword ptr [rsp+0x200], eaxvbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV+rip]vpbroadcastd ymm8, dword ptr [rsp+0x200]vpblendd ymm3, ymm13, ymm8, 0x88vmovups ymm8, ymmword ptr [r8+rdx-0x40]vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-0x40], 0x01vmovups ymm9, ymmword ptr [r8+rdx-0x30]vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-0x30], 0x01vshufps ymm4, ymm8, ymm9, 136vshufps ymm5, ymm8, ymm9, 221vmovups ymm8, ymmword ptr [r8+rdx-0x20]vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-0x20], 0x01vmovups ymm9, ymmword ptr [r8+rdx-0x10]vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-0x10], 0x01vshufps ymm6, ymm8, ymm9, 136vshufps ymm7, ymm8, ymm9, 221vpshufd ymm6, ymm6, 0x93vpshufd ymm7, ymm7, 0x93mov al, 79:vpaddd ymm0, ymm0, ymm4vpaddd ymm0, ymm0, ymm1vpxor ymm3, ymm3, ymm0vpshufb ymm3, ymm3, ymm14vpaddd ymm2, ymm2, ymm3vpxor ymm1, ymm1, ymm2vpsrld ymm8, ymm1, 12vpslld ymm1, ymm1, 20vpor ymm1, ymm1, ymm8vpaddd ymm0, ymm0, ymm5vpaddd ymm0, ymm0, ymm1vpxor ymm3, ymm3, ymm0vpshufb ymm3, ymm3, ymm15vpaddd ymm2, ymm2, ymm3vpxor ymm1, ymm1, ymm2vpsrld ymm8, ymm1, 7vpslld ymm1, ymm1, 25vpor ymm1, ymm1, ymm8vpshufd ymm0, ymm0, 0x93vpshufd ymm3, ymm3, 0x4Evpshufd ymm2, ymm2, 0x39vpaddd ymm0, ymm0, ymm6vpaddd ymm0, ymm0, ymm1vpxor ymm3, ymm3, ymm0vpshufb ymm3, ymm3, ymm14vpaddd ymm2, ymm2, ymm3vpxor ymm1, ymm1, ymm2vpsrld ymm8, ymm1, 12vpslld ymm1, ymm1, 20vpor ymm1, ymm1, ymm8vpaddd ymm0, ymm0, ymm7vpaddd ymm0, ymm0, ymm1vpxor ymm3, ymm3, ymm0vpshufb ymm3, ymm3, ymm15vpaddd ymm2, ymm2, ymm3vpxor ymm1, ymm1, ymm2vpsrld ymm8, ymm1, 7vpslld ymm1, ymm1, 25vpor ymm1, ymm1, ymm8vpshufd ymm0, ymm0, 0x39vpshufd ymm3, ymm3, 0x4Evpshufd ymm2, ymm2, 0x93dec aljz 9fvshufps ymm8, ymm4, ymm5, 214vpshufd ymm9, ymm4, 0x0Fvpshufd ymm4, ymm8, 0x39vshufps ymm8, ymm6, ymm7, 250vpblendd ymm9, ymm9, ymm8, 0xAAvpunpcklqdq ymm8, ymm7, ymm5vpblendd ymm8, ymm8, ymm6, 0x88vpshufd ymm8, ymm8, 0x78vpunpckhdq ymm5, ymm5, ymm7vpunpckldq ymm6, ymm6, ymm5vpshufd ymm7, ymm6, 0x1Evmovdqa ymm5, ymm9vmovdqa ymm6, ymm8jmp 9b9:vpxor ymm0, ymm0, ymm2vpxor ymm1, ymm1, ymm3mov eax, r13dcmp rdx, r15jne 2bvmovdqu xmmword ptr [rbx], xmm0vmovdqu xmmword ptr [rbx+0x10], xmm1vextracti128 xmmword ptr [rbx+0x20], ymm0, 0x01vextracti128 xmmword ptr [rbx+0x30], ymm1, 0x01vmovaps ymm8, ymmword ptr [rsp+0x280]vmovaps ymm0, ymmword ptr [rsp+0x240]vmovups ymm1, ymmword ptr [rsp+0x248]vmovaps ymm2, ymmword ptr [rsp+0x260]vmovups ymm3, ymmword ptr [rsp+0x268]vblendvps ymm0, ymm0, ymm1, ymm8vblendvps ymm2, ymm2, ymm3, ymm8vmovaps ymmword ptr [rsp+0x240], ymm0vmovaps ymmword ptr [rsp+0x260], ymm2add rbx, 64add rdi, 16sub rsi, 23:test rsi, 0x1je 4bvmovdqu xmm0, xmmword ptr [rcx]vmovdqu xmm1, xmmword ptr [rcx+0x10]vmovd xmm3, dword ptr [rsp+0x240]vpinsrd xmm3, xmm3, dword ptr [rsp+0x260], 1vpinsrd xmm13, xmm3, dword ptr [BLAKE3_BLOCK_LEN+rip], 2vmovdqa xmm14, xmmword ptr [ROT16+rip]vmovdqa xmm15, xmmword ptr [ROT8+rip]mov r8, qword ptr [rdi]movzx eax, byte ptr [rbp+0x40]or eax, r13dxor edx, edx.p2align 52:mov r14d, eaxor eax, r12dadd rdx, 64cmp rdx, r15cmovne eax, r14dvmovdqa xmm2, xmmword ptr [BLAKE3_IV+rip]vmovdqa xmm3, xmm13vpinsrd xmm3, xmm3, eax, 3vmovups xmm8, xmmword ptr [r8+rdx-0x40]vmovups xmm9, xmmword ptr [r8+rdx-0x30]vshufps xmm4, xmm8, xmm9, 136vshufps xmm5, xmm8, xmm9, 221vmovups xmm8, xmmword ptr [r8+rdx-0x20]vmovups xmm9, xmmword ptr [r8+rdx-0x10]vshufps xmm6, xmm8, xmm9, 136vshufps xmm7, xmm8, xmm9, 221vpshufd xmm6, xmm6, 0x93vpshufd xmm7, xmm7, 0x93mov al, 79:vpaddd xmm0, xmm0, xmm4vpaddd xmm0, xmm0, xmm1vpxor xmm3, xmm3, xmm0vpshufb xmm3, xmm3, xmm14vpaddd xmm2, xmm2, xmm3vpxor xmm1, xmm1, xmm2vpsrld xmm8, xmm1, 12vpslld xmm1, xmm1, 20vpor xmm1, xmm1, xmm8vpaddd xmm0, xmm0, xmm5vpaddd xmm0, xmm0, xmm1vpxor xmm3, xmm3, xmm0vpshufb xmm3, xmm3, xmm15vpaddd xmm2, xmm2, xmm3vpxor xmm1, xmm1, xmm2vpsrld xmm8, xmm1, 7vpslld xmm1, xmm1, 25vpor xmm1, xmm1, xmm8vpshufd xmm0, xmm0, 0x93vpshufd xmm3, xmm3, 0x4Evpshufd xmm2, xmm2, 0x39vpaddd xmm0, xmm0, xmm6vpaddd xmm0, xmm0, xmm1vpxor xmm3, xmm3, xmm0vpshufb xmm3, xmm3, xmm14vpaddd xmm2, xmm2, xmm3vpxor xmm1, xmm1, xmm2vpsrld xmm8, xmm1, 12vpslld xmm1, xmm1, 20vpor xmm1, xmm1, xmm8vpaddd xmm0, xmm0, xmm7vpaddd xmm0, xmm0, xmm1vpxor xmm3, xmm3, xmm0vpshufb xmm3, xmm3, xmm15vpaddd xmm2, xmm2, xmm3vpxor xmm1, xmm1, xmm2vpsrld xmm8, xmm1, 7vpslld xmm1, xmm1, 25vpor xmm1, xmm1, xmm8vpshufd xmm0, xmm0, 0x39vpshufd xmm3, xmm3, 0x4Evpshufd xmm2, xmm2, 0x93dec aljz 9fvshufps xmm8, xmm4, xmm5, 214vpshufd xmm9, xmm4, 0x0Fvpshufd xmm4, xmm8, 0x39vshufps xmm8, xmm6, xmm7, 250vpblendd xmm9, xmm9, xmm8, 0xAAvpunpcklqdq xmm8, xmm7, xmm5vpblendd xmm8, xmm8, xmm6, 0x88vpshufd xmm8, xmm8, 0x78vpunpckhdq xmm5, xmm5, xmm7vpunpckldq xmm6, xmm6, xmm5vpshufd xmm7, xmm6, 0x1Evmovdqa xmm5, xmm9vmovdqa xmm6, xmm8jmp 9b9:vpxor xmm0, xmm0, xmm2vpxor xmm1, xmm1, xmm3mov eax, r13dcmp rdx, r15jne 2bvmovdqu xmmword ptr [rbx], xmm0vmovdqu xmmword ptr [rbx+0x10], xmm1jmp 4b#ifdef __APPLE__.static_data#else.section .rodata#endif.p2align 6ADD0:.long 0, 1, 2, 3, 4, 5, 6, 7ADD1:.long 8, 8, 8, 8, 8, 8, 8, 8BLAKE3_IV_0:.long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667.long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667BLAKE3_IV_1:.long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85.long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85BLAKE3_IV_2:.long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372.long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372BLAKE3_IV_3:.long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A.long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53ABLAKE3_BLOCK_LEN:.long 0x00000040, 0x00000040, 0x00000040, 0x00000040.long 0x00000040, 0x00000040, 0x00000040, 0x00000040ROT16:.byte 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13ROT8:.byte 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12CMP_MSB_MASK:.long 0x80000000, 0x80000000, 0x80000000, 0x80000000.long 0x80000000, 0x80000000, 0x80000000, 0x80000000BLAKE3_IV:.long 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A
ASM = blake3_avx2_x86-64_unix.SASM += blake3_avx512_x86-64_unix.SASM += blake3_sse2_x86-64_unix.SASM += blake3_sse41_x86-64_unix.S
ASM = vendor/blake3/blake3_avx2_x86-64_unix.SASM += vendor/blake3/blake3_avx512_x86-64_unix.SASM += vendor/blake3/blake3_sse2_x86-64_unix.SASM += vendor/blake3/blake3_sse41_x86-64_unix.S