Skip to content

Commit 2bac836

Browse files
committed
add percentage to md5 and sha1, optmized the aforementioned
1 parent 5288919 commit 2bac836

3 files changed

Lines changed: 283 additions & 189 deletions

File tree

app/hasher.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,9 +239,12 @@ def _calculate_file_subprocess(self,
239239
if not os.path.exists(executable_path):
240240
raise FileNotFoundError(f"Executable not found: {executable_name}")
241241

242+
# Get file size
243+
file_size = os.path.getsize(file_path)
244+
242245
# Launch C++ process
243246
proc = subprocess.Popen(
244-
[executable_path],
247+
[executable_path, str(file_size)],
245248
stdin=subprocess.PIPE,
246249
stdout=subprocess.PIPE,
247250
stderr=subprocess.PIPE,

src/Md5.cpp

Lines changed: 157 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,13 @@
22
#include <vector>
33
#include <string>
44
#include <cstring>
5-
#include <cmath>
65
#include <iomanip>
6+
#include <cstdint>
77
#include "common.h"
8-
#include "sha.h" // Reusing helper functions if applicable, or defining new ones
98

109
using namespace std;
1110

12-
// MD5 Constants
11+
// Constants for MD5 transform
1312
const uint32_t S[64] = {
1413
7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22,
1514
5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20,
@@ -18,134 +17,193 @@ const uint32_t S[64] = {
1817
};
1918

2019
const uint32_t K[64] = {
21-
0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
22-
0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
23-
0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
24-
0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
25-
0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
26-
0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
27-
0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
28-
0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
29-
0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
30-
0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
31-
0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05,
32-
0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
33-
0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
34-
0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
35-
0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
36-
0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
20+
0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
21+
0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
22+
0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
23+
0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
24+
0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
25+
0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
26+
0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
27+
0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
3728
};
3829

39-
// Left rotate function
30+
// Bitwise rotation
4031
inline uint32_t leftRotate(uint32_t x, uint32_t c) {
4132
return (x << c) | (x >> (32 - c));
4233
}
4334

44-
// MD5 functions
35+
// MD5 basic functions
4536
inline uint32_t F(uint32_t x, uint32_t y, uint32_t z) { return (x & y) | (~x & z); }
4637
inline uint32_t G(uint32_t x, uint32_t y, uint32_t z) { return (x & z) | (y & ~z); }
4738
inline uint32_t H(uint32_t x, uint32_t y, uint32_t z) { return x ^ y ^ z; }
4839
inline uint32_t I(uint32_t x, uint32_t y, uint32_t z) { return y ^ (x | ~z); }
4940

50-
void md5(const string& message) {
51-
// Initialize variables
41+
// Process a single 64-byte block
42+
void transform(const uint8_t* block, uint32_t& a0, uint32_t& b0, uint32_t& c0, uint32_t& d0) {
43+
uint32_t M[16];
44+
// Decode 64 bytes into 16 words (little-endian)
45+
for (int j = 0; j < 16; ++j) {
46+
M[j] = block[j * 4] |
47+
(block[j * 4 + 1] << 8) |
48+
(block[j * 4 + 2] << 16) |
49+
(block[j * 4 + 3] << 24);
50+
}
51+
52+
uint32_t A = a0;
53+
uint32_t B = b0;
54+
uint32_t C = c0;
55+
uint32_t D = d0;
56+
57+
// Unrolled rounds
58+
#define STEP1(f, a, b, c, d, k, s) \
59+
a += F(b, c, d) + M[k] + K[k]; a = b + leftRotate(a, s)
60+
61+
STEP1(F, A, B, C, D, 0, 7); STEP1(F, D, A, B, C, 1, 12); STEP1(F, C, D, A, B, 2, 17); STEP1(F, B, C, D, A, 3, 22);
62+
STEP1(F, A, B, C, D, 4, 7); STEP1(F, D, A, B, C, 5, 12); STEP1(F, C, D, A, B, 6, 17); STEP1(F, B, C, D, A, 7, 22);
63+
STEP1(F, A, B, C, D, 8, 7); STEP1(F, D, A, B, C, 9, 12); STEP1(F, C, D, A, B, 10, 17); STEP1(F, B, C, D, A, 11, 22);
64+
STEP1(F, A, B, C, D, 12, 7); STEP1(F, D, A, B, C, 13, 12); STEP1(F, C, D, A, B, 14, 17); STEP1(F, B, C, D, A, 15, 22);
65+
66+
#define STEP2(f, a, b, c, d, k, s, i) \
67+
a += G(b, c, d) + M[k] + K[i]; a = b + leftRotate(a, s)
68+
69+
STEP2(G, A, B, C, D, 1, 5, 16); STEP2(G, D, A, B, C, 6, 9, 17); STEP2(G, C, D, A, B, 11, 14, 18); STEP2(G, B, C, D, A, 0, 20, 19);
70+
STEP2(G, A, B, C, D, 5, 5, 20); STEP2(G, D, A, B, C, 10, 9, 21); STEP2(G, C, D, A, B, 15, 14, 22); STEP2(G, B, C, D, A, 4, 20, 23);
71+
STEP2(G, A, B, C, D, 9, 5, 24); STEP2(G, D, A, B, C, 14, 9, 25); STEP2(G, C, D, A, B, 3, 14, 26); STEP2(G, B, C, D, A, 8, 20, 27);
72+
STEP2(G, A, B, C, D, 13, 5, 28); STEP2(G, D, A, B, C, 2, 9, 29); STEP2(G, C, D, A, B, 7, 14, 30); STEP2(G, B, C, D, A, 12, 20, 31);
73+
74+
#define STEP3(f, a, b, c, d, k, s, i) \
75+
a += H(b, c, d) + M[k] + K[i]; a = b + leftRotate(a, s)
76+
77+
STEP3(H, A, B, C, D, 5, 4, 32); STEP3(H, D, A, B, C, 8, 11, 33); STEP3(H, C, D, A, B, 11, 16, 34); STEP3(H, B, C, D, A, 14, 23, 35);
78+
STEP3(H, A, B, C, D, 1, 4, 36); STEP3(H, D, A, B, C, 4, 11, 37); STEP3(H, C, D, A, B, 7, 16, 38); STEP3(H, B, C, D, A, 10, 23, 39);
79+
STEP3(H, A, B, C, D, 13, 4, 40); STEP3(H, D, A, B, C, 0, 11, 41); STEP3(H, C, D, A, B, 3, 16, 42); STEP3(H, B, C, D, A, 6, 23, 43);
80+
STEP3(H, A, B, C, D, 9, 4, 44); STEP3(H, D, A, B, C, 12, 11, 45); STEP3(H, C, D, A, B, 15, 16, 46); STEP3(H, B, C, D, A, 2, 23, 47);
81+
82+
#define STEP4(f, a, b, c, d, k, s, i) \
83+
a += I(b, c, d) + M[k] + K[i]; a = b + leftRotate(a, s)
84+
85+
STEP4(I, A, B, C, D, 0, 6, 48); STEP4(I, D, A, B, C, 7, 10, 49); STEP4(I, C, D, A, B, 14, 15, 50); STEP4(I, B, C, D, A, 5, 21, 51);
86+
STEP4(I, A, B, C, D, 12, 6, 52); STEP4(I, D, A, B, C, 3, 10, 53); STEP4(I, C, D, A, B, 10, 15, 54); STEP4(I, B, C, D, A, 1, 21, 55);
87+
STEP4(I, A, B, C, D, 8, 6, 56); STEP4(I, D, A, B, C, 15, 10, 57); STEP4(I, C, D, A, B, 6, 15, 58); STEP4(I, B, C, D, A, 13, 21, 59);
88+
STEP4(I, A, B, C, D, 4, 6, 60); STEP4(I, D, A, B, C, 11, 10, 61); STEP4(I, C, D, A, B, 2, 15, 62); STEP4(I, B, C, D, A, 9, 21, 63);
89+
90+
a0 += A;
91+
b0 += B;
92+
c0 += C;
93+
d0 += D;
94+
}
95+
96+
int main(int argc, char* argv[]) {
97+
initBinaryMode();
98+
99+
// Check for file size argument
100+
size_t totalExpectedSize = 0;
101+
if (argc > 1) {
102+
try {
103+
totalExpectedSize = std::stoull(argv[1]);
104+
} catch (...) {
105+
totalExpectedSize = 0;
106+
}
107+
}
108+
109+
// State variables
52110
uint32_t a0 = 0x67452301;
53111
uint32_t b0 = 0xefcdab89;
54112
uint32_t c0 = 0x98badcfe;
55113
uint32_t d0 = 0x10325476;
56-
57-
// Pre-processing: Padding
58-
uint64_t originalLengthBits = message.length() * 8;
59114

60-
// Calculate padding size
61-
size_t messageLen = message.length();
62-
size_t paddingLen = (messageLen % 64 < 56) ? (56 - messageLen % 64) : (120 - messageLen % 64);
63-
size_t totalLen = messageLen + paddingLen + 8;
115+
uint64_t totalBytes = 0;
116+
uint8_t buffer[64];
64117

65-
// Reserve space to avoid reallocations
66-
vector<uint8_t> paddedMessage;
67-
paddedMessage.reserve(totalLen);
118+
// Report initial progress
119+
if (totalExpectedSize > 0) reportProgress(0, totalExpectedSize);
68120

69-
// Copy message
70-
paddedMessage.insert(paddedMessage.end(), message.begin(), message.end());
121+
// Read from stdin in 64-byte chunks
122+
while (cin.read((char*)buffer, 64)) {
123+
totalBytes += 64;
124+
transform(buffer, a0, b0, c0, d0);
125+
126+
// Report progress periodically
127+
if (totalExpectedSize > 0) {
128+
reportProgress(totalBytes, totalExpectedSize);
129+
}
130+
}
71131

72-
// Append '1' bit (0x80 byte)
73-
paddedMessage.push_back(0x80);
132+
// Handle remaining bytes
133+
size_t bytesRead = cin.gcount();
134+
totalBytes += bytesRead;
74135

75-
// Append zeros
76-
paddedMessage.insert(paddedMessage.end(), paddingLen - 1, 0x00);
77-
78-
// Append length (64 bits, little-endian)
79-
for (int i = 0; i < 8; ++i) {
80-
paddedMessage.push_back((originalLengthBits >> (i * 8)) & 0xFF);
136+
// Padding
137+
uint8_t padding[128]; // Max padding needed is 64 + 8 = 72 bytes, but we might cross block boundary
138+
memset(padding, 0, 128);
139+
140+
// Copy remaining bytes to padding buffer
141+
memcpy(padding, buffer, bytesRead);
142+
143+
// Add '1' bit
144+
padding[bytesRead] = 0x80;
145+
146+
size_t paddingLen;
147+
if (bytesRead < 56) {
148+
paddingLen = 56 - bytesRead;
149+
} else {
150+
paddingLen = 120 - bytesRead;
81151
}
82-
83-
// Process message in 512-bit chunks
84-
for (size_t i = 0; i < paddedMessage.size(); i += 64) {
85-
uint32_t M[16];
86-
for (int j = 0; j < 16; ++j) {
87-
M[j] = paddedMessage[i + j * 4] |
88-
(paddedMessage[i + j * 4 + 1] << 8) |
89-
(paddedMessage[i + j * 4 + 2] << 16) |
90-
(paddedMessage[i + j * 4 + 3] << 24);
152+
153+
// Add length (bits) at the end of the last block
154+
uint64_t totalBits = totalBytes * 8;
155+
size_t lengthOffset = bytesRead + paddingLen + 8 - 8; // Position for length
156+
157+
// If we crossed a block boundary, we process the first block
158+
if (bytesRead >= 56) {
159+
transform(padding, a0, b0, c0, d0);
160+
// Move to next block for length
161+
lengthOffset = 64 - 8; // End of second block
162+
// We need to put length at the end of the SECOND block (index 56-63 relative to second block start)
163+
// But wait, my padding logic above is slightly complex. Let's simplify.
164+
}
165+
166+
// Let's redo padding logic to be cleaner
167+
// We have 'bytesRead' bytes in 'buffer'.
168+
// We copy them to a temp buffer that can hold up to 2 blocks (128 bytes)
169+
uint8_t finalBlock[128];
170+
memset(finalBlock, 0, 128);
171+
memcpy(finalBlock, buffer, bytesRead);
172+
173+
finalBlock[bytesRead] = 0x80;
174+
175+
if (bytesRead < 56) {
176+
// Fits in one block
177+
// Append length at bytes 56-63
178+
for (int i = 0; i < 8; ++i) {
179+
finalBlock[56 + i] = (totalBits >> (i * 8)) & 0xFF;
91180
}
92-
93-
uint32_t A = a0;
94-
uint32_t B = b0;
95-
uint32_t C = c0;
96-
uint32_t D = d0;
97-
98-
for (int j = 0; j < 64; ++j) {
99-
uint32_t f, g;
100-
if (j < 16) {
101-
f = F(B, C, D);
102-
g = j;
103-
} else if (j < 32) {
104-
f = G(B, C, D);
105-
g = (5 * j + 1) % 16;
106-
} else if (j < 48) {
107-
f = H(B, C, D);
108-
g = (3 * j + 5) % 16;
109-
} else {
110-
f = I(B, C, D);
111-
g = (7 * j) % 16;
112-
}
113-
114-
uint32_t temp = D;
115-
D = C;
116-
C = B;
117-
B = B + leftRotate(A + f + K[j] + M[g], S[j]);
118-
A = temp;
181+
transform(finalBlock, a0, b0, c0, d0);
182+
} else {
183+
// Need two blocks
184+
// First block is padded with 0s after 0x80
185+
transform(finalBlock, a0, b0, c0, d0);
186+
187+
// Second block has length at end
188+
memset(finalBlock, 0, 64); // Clear first block content
189+
// Length at bytes 56-63 of second block (which is now at index 56 of finalBlock array if we reused it,
190+
// but we just cleared it so it's effectively index 56)
191+
for (int i = 0; i < 8; ++i) {
192+
finalBlock[56 + i] = (totalBits >> (i * 8)) & 0xFF;
119193
}
120-
121-
a0 += A;
122-
b0 += B;
123-
c0 += C;
124-
d0 += D;
194+
transform(finalBlock, a0, b0, c0, d0);
125195
}
126196

127-
// Output hash (little-endian)
197+
// Output
128198
uint32_t result[4] = {a0, b0, c0, d0};
129199
for (int i = 0; i < 4; ++i) {
130-
// We need to print bytes in little-endian order for each 32-bit word
131200
uint8_t* bytes = (uint8_t*)&result[i];
132201
for (int j = 0; j < 4; ++j) {
133202
cout << hex << setfill('0') << setw(2) << (int)bytes[j];
134203
}
135204
}
136-
cout.flush(); // Ensure output is flushed immediately
137-
}
138-
139-
int main() {
140-
initBinaryMode();
141-
142-
string input = readStdinToString();
143-
size_t totalSize = input.length();
144-
145-
reportProgress(0, totalSize);
146-
md5(input);
147-
reportProgress(totalSize, totalSize);
148-
205+
cout.flush();
149206
cout << endl;
207+
150208
return 0;
151209
}

0 commit comments

Comments
 (0)