Skip to content

Commit 0abd079

Browse files
committed
Complete implementation jsoncons
1 parent 8fe000c commit 0abd079

File tree

4 files changed

+205
-51
lines changed

4 files changed

+205
-51
lines changed

src/request_body_processor/json_backend_jsoncons.cc

Lines changed: 195 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,53 @@ class RawJsonTokenCursor {
237237
return false;
238238
}
239239

240+
bool consumeNextNumberToken(std::string_view *raw_token,
241+
std::string *detail) {
242+
std::size_t probe_offset = m_offset;
243+
if (!skipToNextNumberToken(&probe_offset, detail)) {
244+
return false;
245+
}
246+
if (!consumeNumberAt(&probe_offset, raw_token, detail)) {
247+
return false;
248+
}
249+
m_offset = probe_offset;
250+
return true;
251+
}
252+
253+
bool advanceExactNumber(std::string_view exact_number, std::string *detail) {
254+
if (!isValidJsonNumber(exact_number)) {
255+
if (detail != nullptr) {
256+
*detail = "Unable to advance raw JSON number cursor using a non-numeric token.";
257+
}
258+
return false;
259+
}
260+
261+
std::size_t probe_offset = m_offset;
262+
if (!skipToNextNumberToken(&probe_offset, detail)) {
263+
return false;
264+
}
265+
if (probe_offset + exact_number.size() > m_input.size()
266+
|| m_input.compare(probe_offset, exact_number.size(), exact_number)
267+
!= 0) {
268+
if (detail != nullptr) {
269+
*detail = "Exact raw JSON number token did not match jsoncons numeric lexeme.";
270+
}
271+
return false;
272+
}
273+
274+
const std::size_t next_offset = probe_offset + exact_number.size();
275+
if (next_offset < m_input.size()
276+
&& !isNumberBoundary(m_input[next_offset])) {
277+
if (detail != nullptr) {
278+
*detail = "Exact raw JSON number token was followed by additional numeric characters.";
279+
}
280+
return false;
281+
}
282+
283+
m_offset = next_offset;
284+
return true;
285+
}
286+
240287
private:
241288
static bool isWhitespace(char value) {
242289
return std::isspace(static_cast<unsigned char>(value)) != 0;
@@ -246,11 +293,19 @@ class RawJsonTokenCursor {
246293
return std::isxdigit(static_cast<unsigned char>(value)) != 0;
247294
}
248295

296+
static bool isNumberBoundary(char value) {
297+
return isWhitespace(value) || value == ',' || value == ']' || value == '}';
298+
}
299+
249300
void skipInsignificant() {
250-
while (m_offset < m_input.size()) {
251-
char current = m_input[m_offset];
301+
skipInsignificantAt(&m_offset);
302+
}
303+
304+
void skipInsignificantAt(std::size_t *offset) const {
305+
while (*offset < m_input.size()) {
306+
char current = m_input[*offset];
252307
if (isWhitespace(current) || current == ',' || current == ':') {
253-
m_offset++;
308+
(*offset)++;
254309
continue;
255310
}
256311
break;
@@ -259,76 +314,91 @@ class RawJsonTokenCursor {
259314

260315
bool consumeChar(char expected, std::string_view *raw_token,
261316
std::string *detail) {
262-
if (m_offset >= m_input.size() || m_input[m_offset] != expected) {
317+
return consumeCharAt(&m_offset, expected, raw_token, detail);
318+
}
319+
320+
bool consumeCharAt(std::size_t *offset, char expected,
321+
std::string_view *raw_token, std::string *detail) const {
322+
if (*offset >= m_input.size() || m_input[*offset] != expected) {
263323
if (detail != nullptr) {
264324
*detail = std::string("Expected raw JSON token '") + expected
265325
+ "' while synchronizing jsoncons events.";
266326
}
267327
return false;
268328
}
269329

270-
*raw_token = std::string_view(m_input.data() + m_offset, 1);
271-
m_offset++;
330+
*raw_token = std::string_view(m_input.data() + *offset, 1);
331+
(*offset)++;
272332
return true;
273333
}
274334

275335
bool consumeLiteral(const char *literal, std::string_view *raw_token,
276336
std::string *detail) {
337+
return consumeLiteralAt(&m_offset, literal, raw_token, detail);
338+
}
339+
340+
bool consumeLiteralAt(std::size_t *offset, const char *literal,
341+
std::string_view *raw_token, std::string *detail) const {
277342
const std::size_t length = std::char_traits<char>::length(literal);
278-
if (m_offset + length > m_input.size()
279-
|| m_input.compare(m_offset, length, literal) != 0) {
343+
if (*offset + length > m_input.size()
344+
|| m_input.compare(*offset, length, literal) != 0) {
280345
if (detail != nullptr) {
281346
*detail = std::string("Expected raw JSON literal '") + literal
282347
+ "' while synchronizing jsoncons events.";
283348
}
284349
return false;
285350
}
286351

287-
*raw_token = std::string_view(m_input.data() + m_offset, length);
288-
m_offset += length;
352+
*raw_token = std::string_view(m_input.data() + *offset, length);
353+
*offset += length;
289354
return true;
290355
}
291356

292357
bool consumeString(std::string_view *raw_token, std::string *detail) {
293-
const std::size_t start = m_offset;
358+
return consumeStringAt(&m_offset, raw_token, detail);
359+
}
360+
361+
bool consumeStringAt(std::size_t *offset, std::string_view *raw_token,
362+
std::string *detail) const {
363+
const std::size_t start = *offset;
294364

295-
if (m_offset >= m_input.size() || m_input[m_offset] != '"') {
365+
if (*offset >= m_input.size() || m_input[*offset] != '"') {
296366
if (detail != nullptr) {
297367
*detail = "Expected raw JSON string token while synchronizing jsoncons events.";
298368
}
299369
return false;
300370
}
301371

302-
m_offset++;
303-
while (m_offset < m_input.size()) {
304-
char current = m_input[m_offset++];
372+
(*offset)++;
373+
while (*offset < m_input.size()) {
374+
char current = m_input[(*offset)++];
305375
if (current == '\\') {
306-
if (m_offset >= m_input.size()) {
376+
if (*offset >= m_input.size()) {
307377
if (detail != nullptr) {
308378
*detail = "Truncated escape sequence while synchronizing raw JSON string token.";
309379
}
310380
return false;
311381
}
312382

313-
char escaped = m_input[m_offset++];
383+
char escaped = m_input[(*offset)++];
314384
if (escaped == 'u') {
315385
for (int i = 0; i < 4; i++) {
316-
if (m_offset >= m_input.size()
317-
|| !isHexDigit(m_input[m_offset])) {
386+
if (*offset >= m_input.size()
387+
|| !isHexDigit(m_input[*offset])) {
318388
if (detail != nullptr) {
319389
*detail = "Invalid Unicode escape while synchronizing raw JSON string token.";
320390
}
321391
return false;
322392
}
323-
m_offset++;
393+
(*offset)++;
324394
}
325395
}
326396
continue;
327397
}
328398

329399
if (current == '"') {
330400
*raw_token = std::string_view(m_input.data() + start,
331-
m_offset - start);
401+
*offset - start);
332402
return true;
333403
}
334404

@@ -347,68 +417,125 @@ class RawJsonTokenCursor {
347417
}
348418

349419
bool consumeNumber(std::string_view *raw_token, std::string *detail) {
350-
const std::size_t start = m_offset;
420+
return consumeNumberAt(&m_offset, raw_token, detail);
421+
}
351422

352-
if (m_offset < m_input.size() && m_input[m_offset] == '-') {
353-
m_offset++;
423+
bool consumeNumberAt(std::size_t *offset, std::string_view *raw_token,
424+
std::string *detail) const {
425+
const std::size_t start = *offset;
426+
427+
if (*offset < m_input.size() && m_input[*offset] == '-') {
428+
(*offset)++;
354429
}
355430

356-
if (m_offset >= m_input.size()) {
431+
if (*offset >= m_input.size()) {
357432
if (detail != nullptr) {
358433
*detail = "Unexpected end of input while synchronizing raw JSON number token.";
359434
}
360435
return false;
361436
}
362437

363-
if (m_input[m_offset] == '0') {
364-
m_offset++;
438+
if (m_input[*offset] == '0') {
439+
(*offset)++;
365440
} else {
366-
if (!isDigit(m_input[m_offset]) || m_input[m_offset] == '0') {
441+
if (!isDigit(m_input[*offset]) || m_input[*offset] == '0') {
367442
if (detail != nullptr) {
368443
*detail = "Invalid integer component while synchronizing raw JSON number token.";
369444
}
370445
return false;
371446
}
372-
while (m_offset < m_input.size() && isDigit(m_input[m_offset])) {
373-
m_offset++;
447+
while (*offset < m_input.size() && isDigit(m_input[*offset])) {
448+
(*offset)++;
374449
}
375450
}
376451

377-
if (m_offset < m_input.size() && m_input[m_offset] == '.') {
378-
m_offset++;
379-
if (m_offset >= m_input.size() || !isDigit(m_input[m_offset])) {
452+
if (*offset < m_input.size() && m_input[*offset] == '.') {
453+
(*offset)++;
454+
if (*offset >= m_input.size() || !isDigit(m_input[*offset])) {
380455
if (detail != nullptr) {
381456
*detail = "Invalid fraction component while synchronizing raw JSON number token.";
382457
}
383458
return false;
384459
}
385-
while (m_offset < m_input.size() && isDigit(m_input[m_offset])) {
386-
m_offset++;
460+
while (*offset < m_input.size() && isDigit(m_input[*offset])) {
461+
(*offset)++;
387462
}
388463
}
389464

390-
if (m_offset < m_input.size()
391-
&& (m_input[m_offset] == 'e' || m_input[m_offset] == 'E')) {
392-
m_offset++;
393-
if (m_offset < m_input.size()
394-
&& (m_input[m_offset] == '+' || m_input[m_offset] == '-')) {
395-
m_offset++;
465+
if (*offset < m_input.size()
466+
&& (m_input[*offset] == 'e' || m_input[*offset] == 'E')) {
467+
(*offset)++;
468+
if (*offset < m_input.size()
469+
&& (m_input[*offset] == '+' || m_input[*offset] == '-')) {
470+
(*offset)++;
396471
}
397-
if (m_offset >= m_input.size() || !isDigit(m_input[m_offset])) {
472+
if (*offset >= m_input.size() || !isDigit(m_input[*offset])) {
398473
if (detail != nullptr) {
399474
*detail = "Invalid exponent component while synchronizing raw JSON number token.";
400475
}
401476
return false;
402477
}
403-
while (m_offset < m_input.size() && isDigit(m_input[m_offset])) {
404-
m_offset++;
478+
while (*offset < m_input.size() && isDigit(m_input[*offset])) {
479+
(*offset)++;
405480
}
406481
}
407482

408-
*raw_token = std::string_view(m_input.data() + start, m_offset - start);
483+
*raw_token = std::string_view(m_input.data() + start, *offset - start);
409484
return true;
410485
}
411486

487+
bool skipTokenAt(std::size_t *offset, std::string *detail) const {
488+
std::string_view ignored;
489+
if (*offset >= m_input.size()) {
490+
if (detail != nullptr) {
491+
*detail = "Unexpected end of input while searching for a raw JSON number token.";
492+
}
493+
return false;
494+
}
495+
496+
switch (m_input[*offset]) {
497+
case '{':
498+
return consumeCharAt(offset, '{', &ignored, detail);
499+
case '}':
500+
return consumeCharAt(offset, '}', &ignored, detail);
501+
case '[':
502+
return consumeCharAt(offset, '[', &ignored, detail);
503+
case ']':
504+
return consumeCharAt(offset, ']', &ignored, detail);
505+
case '"':
506+
return consumeStringAt(offset, &ignored, detail);
507+
case 't':
508+
return consumeLiteralAt(offset, "true", &ignored, detail);
509+
case 'f':
510+
return consumeLiteralAt(offset, "false", &ignored, detail);
511+
case 'n':
512+
return consumeLiteralAt(offset, "null", &ignored, detail);
513+
default:
514+
if (detail != nullptr) {
515+
*detail = "Unable to locate the next raw JSON number token while synchronizing jsoncons events.";
516+
}
517+
return false;
518+
}
519+
}
520+
521+
bool skipToNextNumberToken(std::size_t *offset, std::string *detail) const {
522+
while (true) {
523+
skipInsignificantAt(offset);
524+
if (*offset >= m_input.size()) {
525+
if (detail != nullptr) {
526+
*detail = "Unexpected end of input while searching for a raw JSON number token.";
527+
}
528+
return false;
529+
}
530+
if (m_input[*offset] == '-' || isDigit(m_input[*offset])) {
531+
return true;
532+
}
533+
if (!skipTokenAt(offset, detail)) {
534+
return false;
535+
}
536+
}
537+
}
538+
412539
const std::string &m_input;
413540
std::size_t m_offset{0};
414541
};
@@ -453,12 +580,6 @@ JsonParseResult emitEvent(const std::string &input, JsonEventSink *sink,
453580
std::string_view raw_token;
454581
std::string sync_detail;
455582

456-
if (!token_cursor->consume(event, &raw_token, &sync_detail)) {
457-
return makeResult(JsonParseStatus::InternalError,
458-
JsonSinkStatus::Continue, sync_detail);
459-
}
460-
recordJsonconsTokenSyncStep();
461-
462583
switch (event.event_type()) {
463584
case jsoncons::staj_event_type::begin_object:
464585
sink_status = sink->on_start_object();
@@ -502,6 +623,23 @@ JsonParseResult emitEvent(const std::string &input, JsonEventSink *sink,
502623
return fromJsonconsError(error, context);
503624
}
504625
if (isNumericStringEvent(event)) {
626+
const std::string_view decoded_number(decoded.data(), decoded.size());
627+
if (isValidJsonNumber(decoded_number)
628+
&& token_cursor->advanceExactNumber(decoded_number,
629+
&sync_detail)) {
630+
recordJsonconsTokenExactAdvanceStep();
631+
sink_status = sink->on_number(decoded_number);
632+
if (sink_status != JsonSinkStatus::Continue) {
633+
return stopTraversal(sink_status, "handling a number");
634+
}
635+
return makeResult(JsonParseStatus::Ok);
636+
}
637+
if (!token_cursor->consumeNextNumberToken(&raw_token,
638+
&sync_detail)) {
639+
return makeResult(JsonParseStatus::InternalError,
640+
JsonSinkStatus::Continue, sync_detail);
641+
}
642+
recordJsonconsTokenSyncStep();
505643
std::string_view raw_number = rawNumberFromContext(input,
506644
jsoncons::staj_event_type::double_value, context, event,
507645
raw_token);
@@ -545,6 +683,12 @@ JsonParseResult emitEvent(const std::string &input, JsonEventSink *sink,
545683
case jsoncons::staj_event_type::uint64_value:
546684
case jsoncons::staj_event_type::double_value:
547685
case jsoncons::staj_event_type::half_value: {
686+
if (!token_cursor->consumeNextNumberToken(&raw_token,
687+
&sync_detail)) {
688+
return makeResult(JsonParseStatus::InternalError,
689+
JsonSinkStatus::Continue, sync_detail);
690+
}
691+
recordJsonconsTokenSyncStep();
548692
std::string_view raw_number = rawNumberFromContext(input,
549693
event.event_type(), context, event, raw_token);
550694
if (raw_number.empty()) {

src/request_body_processor/json_instrumentation.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,5 +114,11 @@ void recordJsonconsTokenSyncStep() noexcept {
114114
#endif
115115
}
116116

117+
void recordJsonconsTokenExactAdvanceStep() noexcept {
118+
#ifdef MSC_JSON_AUDIT_INSTRUMENTATION
119+
g_metrics.jsoncons_token_exact_advance_steps++;
120+
#endif
121+
}
122+
117123
} // namespace RequestBodyProcessor
118124
} // namespace modsecurity

0 commit comments

Comments
 (0)