Skip to content

Commit ffdc044

Browse files
committed
performance adjustment
1 parent f349140 commit ffdc044

File tree

5 files changed

+331
-48
lines changed

5 files changed

+331
-48
lines changed

src/request_body_processor/json_adapter.cc

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,28 @@ JsonParseResult normalizeResult(JsonParseResult result) {
5656

5757
} // namespace
5858

59+
JsonParseResult JSONAdapter::parse(std::string &input,
60+
JsonEventSink *sink, const JsonBackendParseOptions &options) const {
61+
if (sink == nullptr) {
62+
return makeResult(JsonParseStatus::InternalError,
63+
JsonSinkStatus::InternalError, "JSON event sink is null.");
64+
}
65+
66+
if (input.empty()) {
67+
return makeResult(JsonParseStatus::Ok);
68+
}
69+
70+
#if defined(MSC_JSON_BACKEND_SIMDJSON)
71+
return normalizeResult(parseDocumentWithSimdjson(input, sink, options));
72+
#elif defined(MSC_JSON_BACKEND_JSONCONS)
73+
return normalizeResult(parseDocumentWithJsoncons(input, sink, options));
74+
#else
75+
return makeResult(JsonParseStatus::InternalError,
76+
JsonSinkStatus::InternalError,
77+
"ModSecurity was built without a selected JSON backend.");
78+
#endif
79+
}
80+
5981
JsonParseResult JSONAdapter::parse(const std::string &input,
6082
JsonEventSink *sink, const JsonBackendParseOptions &options) const {
6183
if (sink == nullptr) {

src/request_body_processor/json_adapter.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ namespace RequestBodyProcessor {
2525

2626
class JSONAdapter {
2727
public:
28+
JsonParseResult parse(std::string &input, JsonEventSink *sink,
29+
const JsonBackendParseOptions &options = JsonBackendParseOptions()) const;
30+
2831
JsonParseResult parse(const std::string &input, JsonEventSink *sink,
2932
const JsonBackendParseOptions &options = JsonBackendParseOptions()) const;
3033
};

src/request_body_processor/json_backend.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,9 @@ class JsonEventSink {
7171
JsonParseResult parseDocumentWithSimdjson(const std::string &input,
7272
JsonEventSink *sink, const JsonBackendParseOptions &options);
7373

74+
JsonParseResult parseDocumentWithSimdjson(std::string &input,
75+
JsonEventSink *sink, const JsonBackendParseOptions &options);
76+
7477
JsonParseResult parseDocumentWithJsoncons(const std::string &input,
7578
JsonEventSink *sink, const JsonBackendParseOptions &options);
7679

src/request_body_processor/json_backend_simdjson.cc

Lines changed: 132 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
#include "src/request_body_processor/json_backend.h"
2121

2222
#include <algorithm>
23-
#include <cassert>
2423
#include <chrono>
2524
#include <cstdint>
2625
#include <memory>
@@ -90,13 +89,33 @@ JsonParseResult fromSimdjsonError(simdjson::error_code error) {
9089
}
9190
}
9291

92+
std::size_t effectiveTechnicalMaxDepth(
93+
const JsonBackendParseOptions &options) {
94+
return options.technical_max_depth > 0
95+
? static_cast<std::size_t>(options.technical_max_depth) : 1;
96+
}
97+
98+
std::string_view trimTrailingJsonWhitespace(std::string_view token) {
99+
while (!token.empty()) {
100+
const char tail = token.back();
101+
if (tail != ' ' && tail != '\t' && tail != '\n' && tail != '\r') {
102+
break;
103+
}
104+
token.remove_suffix(1);
105+
}
106+
return token;
107+
}
108+
93109
/*
94110
* The ondemand parser is reused per thread because simdjson benefits from
95111
* keeping its internal buffers warm across parses. thread_local storage keeps
96112
* the parser isolated to the calling thread, so no parser state is shared
97113
* across transactions running on different threads. The parse and full
98114
* document traversal both complete inside parseDocumentWithSimdjson(), so no
99-
* parser-backed state escapes this function.
115+
* parser-backed state escapes this function. We intentionally do not add an
116+
* automatic release/recreate heuristic here: the vendored simdjson API
117+
* explicitly supports parser reuse, and retained capacity after unusually
118+
* large inputs remains a conscious tradeoff rather than an accidental leak.
100119
*/
101120
simdjson::ondemand::parser &getReusableSimdjsonParser() {
102121
thread_local std::unique_ptr<simdjson::ondemand::parser> parser;
@@ -116,8 +135,7 @@ simdjson::ondemand::parser &getReusableSimdjsonParser() {
116135

117136
std::size_t clampRequestedMaxDepth(std::size_t input_size,
118137
const JsonBackendParseOptions &options) {
119-
const std::size_t requested_depth = options.technical_max_depth > 0
120-
? static_cast<std::size_t>(options.technical_max_depth) : 1;
138+
const std::size_t requested_depth = effectiveTechnicalMaxDepth(options);
121139
const std::size_t max_possible_depth = (input_size / 2) + 1;
122140
return std::min(requested_depth, std::max<std::size_t>(1,
123141
max_possible_depth));
@@ -140,6 +158,13 @@ simdjson::error_code prepareParser(simdjson::ondemand::parser *parser,
140158
return simdjson::SUCCESS;
141159
}
142160

161+
// simdjson reuses parser buffers across parses. allocate() can grow the
162+
// per-thread parser to satisfy a larger document or different max-depth,
163+
// but it does not proactively shrink retained capacity for later, smaller
164+
// inputs. In simdjson 4.6.1 the max-depth parameter is only enforced by
165+
// simdjson's development checks, so we keep passing it here for that
166+
// internal guardrail while our own walker enforces technical_max_depth at
167+
// runtime using current_depth().
143168
return parser->allocate(input_size, required_max_depth);
144169
}
145170

@@ -154,7 +179,10 @@ JsonParseResult getResult(ResultType &&result, TargetType *target) {
154179

155180
class JsonBackendWalker {
156181
public:
157-
explicit JsonBackendWalker(JsonEventSink *sink) : m_sink(sink) { }
182+
JsonBackendWalker(JsonEventSink *sink,
183+
const JsonBackendParseOptions &options)
184+
: m_sink(sink),
185+
m_technical_max_depth(effectiveTechnicalMaxDepth(options)) { }
158186

159187
JsonParseResult walk(simdjson::ondemand::document *document) {
160188
bool is_scalar = false;
@@ -205,7 +233,8 @@ class JsonBackendWalker {
205233
return result;
206234
}
207235

208-
JsonSinkStatus sink_status = m_sink->on_number(raw_number);
236+
JsonSinkStatus sink_status = m_sink->on_number(
237+
trimTrailingJsonWhitespace(raw_number));
209238
if (sink_status != JsonSinkStatus::Continue) {
210239
return stopTraversal(sink_status, "handling a root number");
211240
}
@@ -241,11 +270,13 @@ class JsonBackendWalker {
241270
}
242271
return makeResult(JsonParseStatus::Ok);
243272
}
273+
case simdjson::ondemand::json_type::unknown:
274+
return makeResult(JsonParseStatus::ParseError,
275+
"Invalid JSON token encountered in simdjson backend.");
244276
case simdjson::ondemand::json_type::object:
245277
case simdjson::ondemand::json_type::array:
246-
case simdjson::ondemand::json_type::unknown:
247278
return makeResult(JsonParseStatus::InternalError,
248-
"Unexpected root scalar type encountered in simdjson backend.");
279+
"Unexpected root scalar container encountered in simdjson backend.");
249280
}
250281

251282
return makeResult(JsonParseStatus::InternalError,
@@ -262,8 +293,14 @@ class JsonBackendWalker {
262293

263294
switch (type) {
264295
case simdjson::ondemand::json_type::object:
296+
if (auto result = enforceTechnicalDepth(value); !result.ok()) {
297+
return result;
298+
}
265299
return walkObject(value);
266300
case simdjson::ondemand::json_type::array:
301+
if (auto result = enforceTechnicalDepth(value); !result.ok()) {
302+
return result;
303+
}
267304
return walkArray(value);
268305
case simdjson::ondemand::json_type::string:
269306
return walkString(value);
@@ -279,8 +316,8 @@ class JsonBackendWalker {
279316
return makeResult(JsonParseStatus::Ok);
280317
}
281318
case simdjson::ondemand::json_type::unknown:
282-
return makeResult(JsonParseStatus::InternalError,
283-
"Unknown JSON token type encountered.");
319+
return makeResult(JsonParseStatus::ParseError,
320+
"Invalid JSON token encountered in simdjson backend.");
284321
}
285322

286323
return makeResult(JsonParseStatus::InternalError,
@@ -385,7 +422,8 @@ class JsonBackendWalker {
385422
}
386423

387424
JsonParseResult walkNumber(simdjson::ondemand::value value) {
388-
std::string_view raw_number = value.raw_json_token();
425+
std::string_view raw_number = trimTrailingJsonWhitespace(
426+
value.raw_json_token());
389427
JsonSinkStatus sink_status = m_sink->on_number(raw_number);
390428

391429
if (sink_status != JsonSinkStatus::Continue) {
@@ -410,52 +448,79 @@ class JsonBackendWalker {
410448
return makeResult(JsonParseStatus::Ok);
411449
}
412450

451+
JsonParseResult enforceTechnicalDepth(simdjson::ondemand::value value) {
452+
const int32_t current_depth = value.current_depth();
453+
if (current_depth <= 0) {
454+
return makeResult(JsonParseStatus::InternalError,
455+
"Invalid current depth reported by simdjson backend.");
456+
}
457+
458+
if (static_cast<std::size_t>(current_depth) > m_technical_max_depth) {
459+
return makeResult(JsonParseStatus::ParseError,
460+
"JSON nesting depth exceeds backend technical max depth.");
461+
}
462+
463+
return makeResult(JsonParseStatus::Ok);
464+
}
465+
413466
JsonEventSink *m_sink;
467+
std::size_t m_technical_max_depth;
414468
};
415469

416-
} // namespace
470+
struct PreparedSimdjsonInput {
471+
simdjson::padded_string_view view{};
472+
simdjson::padded_string owned_copy{};
473+
};
417474

418-
JsonParseResult parseDocumentWithSimdjson(const std::string &input,
419-
JsonEventSink *sink, const JsonBackendParseOptions &options) {
420-
if (sink == nullptr) {
421-
return makeResult(JsonParseStatus::InternalError,
422-
JsonSinkStatus::InternalError, "JSON event sink is null.");
423-
}
475+
PreparedSimdjsonInput prepareMutableSimdjsonInput(std::string *input) {
476+
PreparedSimdjsonInput prepared;
424477

425-
const char *const input_data = input.data();
426-
const std::size_t input_size = input.size();
478+
// The production request-body path owns a mutable std::string, so we can
479+
// pad that buffer in place and keep the logical JSON length in the
480+
// returned padded_string_view. This removes the extra padded_string copy
481+
// while still satisfying simdjson's padding requirement explicitly.
482+
prepared.view = simdjson::pad(*input);
483+
return prepared;
484+
}
427485

428-
simdjson::ondemand::parser &parser = getReusableSimdjsonParser();
429-
// This only prepares parser capacity and max-depth bookkeeping. It does
430-
// not make the caller-provided string safe for zero-copy parsing.
431-
if (auto error = prepareParser(&parser, input_size, options); error) {
432-
return fromSimdjsonError(error);
486+
PreparedSimdjsonInput prepareConstSimdjsonInput(const std::string &input) {
487+
PreparedSimdjsonInput prepared;
488+
prepared.view = simdjson::padded_string_view(input);
489+
490+
// The const path must not guess about std::string capacity. We only parse
491+
// directly when simdjson itself confirms that the existing allocation
492+
// and/or trailing whitespace provide sufficient padding.
493+
if (prepared.view.has_sufficient_padding()) {
494+
return prepared;
433495
}
434496

435-
// TODO: Revisit zero-copy only when the caller can guarantee a stable
436-
// buffer whose allocation is at least len + SIMDJSON_PADDING bytes.
437-
//
438-
// We intentionally keep the padded_string copy here. The current input is
439-
// a const std::string built from the request-body snapshot/append path, so
440-
// it does not provide guaranteed padding for simdjson's direct iterate()
441-
// overloads. In practice large request bodies often end up with
442-
// size() == capacity(), making any direct path allocator- and stdlib-
443-
// dependent. padded_string keeps this backend deterministic until the
444-
// caller can provide guaranteed lifetime and padding.
445497
#ifdef MSC_JSON_AUDIT_INSTRUMENTATION
446498
const auto padded_start = std::chrono::steady_clock::now();
447-
simdjson::padded_string padded(input);
448-
recordSimdjsonPaddedCopy(input_size, static_cast<std::uint64_t>(
499+
prepared.owned_copy = simdjson::padded_string(input);
500+
recordSimdjsonPaddedCopy(input.size(), static_cast<std::uint64_t>(
449501
std::chrono::duration_cast<std::chrono::nanoseconds>(
450502
std::chrono::steady_clock::now() - padded_start).count()));
451503
#else
452-
simdjson::padded_string padded(input);
504+
prepared.owned_copy = simdjson::padded_string(input);
453505
#endif
454-
simdjson::ondemand::document document;
506+
prepared.view = prepared.owned_copy;
507+
return prepared;
508+
}
509+
510+
JsonParseResult parsePreparedDocumentWithSimdjson(
511+
simdjson::padded_string_view input, JsonEventSink *sink,
512+
const JsonBackendParseOptions &options) {
513+
simdjson::ondemand::parser &parser = getReusableSimdjsonParser();
514+
// This only prepares parser capacity and max-depth bookkeeping. Buffer
515+
// lifetime and padding must already have been handled by the caller.
516+
if (auto error = prepareParser(&parser, input.length(), options); error) {
517+
return fromSimdjsonError(error);
518+
}
455519

520+
simdjson::ondemand::document document;
456521
#ifdef MSC_JSON_AUDIT_INSTRUMENTATION
457522
const auto iterate_start = std::chrono::steady_clock::now();
458-
if (auto error = parser.iterate(padded).get(document); error) {
523+
if (auto error = parser.iterate(input).get(document); error) {
459524
recordSimdjsonIterate(static_cast<std::uint64_t>(
460525
std::chrono::duration_cast<std::chrono::nanoseconds>(
461526
std::chrono::steady_clock::now() - iterate_start).count()));
@@ -465,18 +530,37 @@ JsonParseResult parseDocumentWithSimdjson(const std::string &input,
465530
std::chrono::duration_cast<std::chrono::nanoseconds>(
466531
std::chrono::steady_clock::now() - iterate_start).count()));
467532
#else
468-
if (auto error = parser.iterate(padded).get(document); error) {
533+
if (auto error = parser.iterate(input).get(document); error) {
469534
return fromSimdjsonError(error);
470535
}
471536
#endif
472537

473-
JsonBackendWalker walker(sink);
474-
assert(input.data() == input_data);
475-
assert(input.size() == input_size);
476-
JsonParseResult walk_result = walker.walk(&document);
477-
assert(input.data() == input_data);
478-
assert(input.size() == input_size);
479-
return walk_result;
538+
JsonBackendWalker walker(sink, options);
539+
return walker.walk(&document);
540+
}
541+
542+
} // namespace
543+
544+
JsonParseResult parseDocumentWithSimdjson(std::string &input,
545+
JsonEventSink *sink, const JsonBackendParseOptions &options) {
546+
if (sink == nullptr) {
547+
return makeResult(JsonParseStatus::InternalError,
548+
JsonSinkStatus::InternalError, "JSON event sink is null.");
549+
}
550+
551+
PreparedSimdjsonInput prepared = prepareMutableSimdjsonInput(&input);
552+
return parsePreparedDocumentWithSimdjson(prepared.view, sink, options);
553+
}
554+
555+
JsonParseResult parseDocumentWithSimdjson(const std::string &input,
556+
JsonEventSink *sink, const JsonBackendParseOptions &options) {
557+
if (sink == nullptr) {
558+
return makeResult(JsonParseStatus::InternalError,
559+
JsonSinkStatus::InternalError, "JSON event sink is null.");
560+
}
561+
562+
PreparedSimdjsonInput prepared = prepareConstSimdjsonInput(input);
563+
return parsePreparedDocumentWithSimdjson(prepared.view, sink, options);
480564
}
481565

482566
} // namespace RequestBodyProcessor

0 commit comments

Comments
 (0)