2020#include " src/request_body_processor/json_backend.h"
2121
2222#include < algorithm>
23- #include < cassert>
2423#include < chrono>
2524#include < cstdint>
2625#include < memory>
@@ -90,13 +89,33 @@ JsonParseResult fromSimdjsonError(simdjson::error_code error) {
9089 }
9190}
9291
92+ std::size_t effectiveTechnicalMaxDepth (
93+ const JsonBackendParseOptions &options) {
94+ return options.technical_max_depth > 0
95+ ? static_cast <std::size_t >(options.technical_max_depth ) : 1 ;
96+ }
97+
98+ std::string_view trimTrailingJsonWhitespace (std::string_view token) {
99+ while (!token.empty ()) {
100+ const char tail = token.back ();
101+ if (tail != ' ' && tail != ' \t ' && tail != ' \n ' && tail != ' \r ' ) {
102+ break ;
103+ }
104+ token.remove_suffix (1 );
105+ }
106+ return token;
107+ }
108+
93109/*
94110 * The ondemand parser is reused per thread because simdjson benefits from
95111 * keeping its internal buffers warm across parses. thread_local storage keeps
96112 * the parser isolated to the calling thread, so no parser state is shared
97113 * across transactions running on different threads. The parse and full
98114 * document traversal both complete inside parseDocumentWithSimdjson(), so no
99- * parser-backed state escapes this function.
115+ * parser-backed state escapes this function. We intentionally do not add an
116+ * automatic release/recreate heuristic here: the vendored simdjson API
117+ * explicitly supports parser reuse, and retained capacity after unusually
118+ * large inputs remains a conscious tradeoff rather than an accidental leak.
100119 */
101120simdjson::ondemand::parser &getReusableSimdjsonParser () {
102121 thread_local std::unique_ptr<simdjson::ondemand::parser> parser;
@@ -116,8 +135,7 @@ simdjson::ondemand::parser &getReusableSimdjsonParser() {
116135
117136std::size_t clampRequestedMaxDepth (std::size_t input_size,
118137 const JsonBackendParseOptions &options) {
119- const std::size_t requested_depth = options.technical_max_depth > 0
120- ? static_cast <std::size_t >(options.technical_max_depth ) : 1 ;
138+ const std::size_t requested_depth = effectiveTechnicalMaxDepth (options);
121139 const std::size_t max_possible_depth = (input_size / 2 ) + 1 ;
122140 return std::min (requested_depth, std::max<std::size_t >(1 ,
123141 max_possible_depth));
@@ -140,6 +158,13 @@ simdjson::error_code prepareParser(simdjson::ondemand::parser *parser,
140158 return simdjson::SUCCESS;
141159 }
142160
161+ // simdjson reuses parser buffers across parses. allocate() can grow the
162+ // per-thread parser to satisfy a larger document or different max-depth,
163+ // but it does not proactively shrink retained capacity for later, smaller
164+ // inputs. In simdjson 4.6.1 the max-depth parameter is only enforced by
165+ // simdjson's development checks, so we keep passing it here for that
166+ // internal guardrail while our own walker enforces technical_max_depth at
167+ // runtime using current_depth().
143168 return parser->allocate (input_size, required_max_depth);
144169}
145170
@@ -154,7 +179,10 @@ JsonParseResult getResult(ResultType &&result, TargetType *target) {
154179
155180class JsonBackendWalker {
156181 public:
157- explicit JsonBackendWalker (JsonEventSink *sink) : m_sink(sink) { }
182+ JsonBackendWalker (JsonEventSink *sink,
183+ const JsonBackendParseOptions &options)
184+ : m_sink(sink),
185+ m_technical_max_depth (effectiveTechnicalMaxDepth(options)) { }
158186
159187 JsonParseResult walk (simdjson::ondemand::document *document) {
160188 bool is_scalar = false ;
@@ -205,7 +233,8 @@ class JsonBackendWalker {
205233 return result;
206234 }
207235
208- JsonSinkStatus sink_status = m_sink->on_number (raw_number);
236+ JsonSinkStatus sink_status = m_sink->on_number (
237+ trimTrailingJsonWhitespace (raw_number));
209238 if (sink_status != JsonSinkStatus::Continue) {
210239 return stopTraversal (sink_status, " handling a root number" );
211240 }
@@ -241,11 +270,13 @@ class JsonBackendWalker {
241270 }
242271 return makeResult (JsonParseStatus::Ok);
243272 }
273+ case simdjson::ondemand::json_type::unknown:
274+ return makeResult (JsonParseStatus::ParseError,
275+ " Invalid JSON token encountered in simdjson backend." );
244276 case simdjson::ondemand::json_type::object:
245277 case simdjson::ondemand::json_type::array:
246- case simdjson::ondemand::json_type::unknown:
247278 return makeResult (JsonParseStatus::InternalError,
248- " Unexpected root scalar type encountered in simdjson backend." );
279+ " Unexpected root scalar container encountered in simdjson backend." );
249280 }
250281
251282 return makeResult (JsonParseStatus::InternalError,
@@ -262,8 +293,14 @@ class JsonBackendWalker {
262293
263294 switch (type) {
264295 case simdjson::ondemand::json_type::object:
296+ if (auto result = enforceTechnicalDepth (value); !result.ok ()) {
297+ return result;
298+ }
265299 return walkObject (value);
266300 case simdjson::ondemand::json_type::array:
301+ if (auto result = enforceTechnicalDepth (value); !result.ok ()) {
302+ return result;
303+ }
267304 return walkArray (value);
268305 case simdjson::ondemand::json_type::string:
269306 return walkString (value);
@@ -279,8 +316,8 @@ class JsonBackendWalker {
279316 return makeResult (JsonParseStatus::Ok);
280317 }
281318 case simdjson::ondemand::json_type::unknown:
282- return makeResult (JsonParseStatus::InternalError ,
283- " Unknown JSON token type encountered." );
319+ return makeResult (JsonParseStatus::ParseError ,
320+ " Invalid JSON token encountered in simdjson backend ." );
284321 }
285322
286323 return makeResult (JsonParseStatus::InternalError,
@@ -385,7 +422,8 @@ class JsonBackendWalker {
385422 }
386423
387424 JsonParseResult walkNumber (simdjson::ondemand::value value) {
388- std::string_view raw_number = value.raw_json_token ();
425+ std::string_view raw_number = trimTrailingJsonWhitespace (
426+ value.raw_json_token ());
389427 JsonSinkStatus sink_status = m_sink->on_number (raw_number);
390428
391429 if (sink_status != JsonSinkStatus::Continue) {
@@ -410,52 +448,79 @@ class JsonBackendWalker {
410448 return makeResult (JsonParseStatus::Ok);
411449 }
412450
451+ JsonParseResult enforceTechnicalDepth (simdjson::ondemand::value value) {
452+ const int32_t current_depth = value.current_depth ();
453+ if (current_depth <= 0 ) {
454+ return makeResult (JsonParseStatus::InternalError,
455+ " Invalid current depth reported by simdjson backend." );
456+ }
457+
458+ if (static_cast <std::size_t >(current_depth) > m_technical_max_depth) {
459+ return makeResult (JsonParseStatus::ParseError,
460+ " JSON nesting depth exceeds backend technical max depth." );
461+ }
462+
463+ return makeResult (JsonParseStatus::Ok);
464+ }
465+
413466 JsonEventSink *m_sink;
467+ std::size_t m_technical_max_depth;
414468};
415469
416- } // namespace
470+ struct PreparedSimdjsonInput {
471+ simdjson::padded_string_view view{};
472+ simdjson::padded_string owned_copy{};
473+ };
417474
418- JsonParseResult parseDocumentWithSimdjson (const std::string &input,
419- JsonEventSink *sink, const JsonBackendParseOptions &options) {
420- if (sink == nullptr ) {
421- return makeResult (JsonParseStatus::InternalError,
422- JsonSinkStatus::InternalError, " JSON event sink is null." );
423- }
475+ PreparedSimdjsonInput prepareMutableSimdjsonInput (std::string *input) {
476+ PreparedSimdjsonInput prepared;
424477
425- const char *const input_data = input.data ();
426- const std::size_t input_size = input.size ();
478+ // The production request-body path owns a mutable std::string, so we can
479+ // pad that buffer in place and keep the logical JSON length in the
480+ // returned padded_string_view. This removes the extra padded_string copy
481+ // while still satisfying simdjson's padding requirement explicitly.
482+ prepared.view = simdjson::pad (*input);
483+ return prepared;
484+ }
427485
428- simdjson::ondemand::parser &parser = getReusableSimdjsonParser ();
429- // This only prepares parser capacity and max-depth bookkeeping. It does
430- // not make the caller-provided string safe for zero-copy parsing.
431- if (auto error = prepareParser (&parser, input_size, options); error) {
432- return fromSimdjsonError (error);
486+ PreparedSimdjsonInput prepareConstSimdjsonInput (const std::string &input) {
487+ PreparedSimdjsonInput prepared;
488+ prepared.view = simdjson::padded_string_view (input);
489+
490+ // The const path must not guess about std::string capacity. We only parse
491+ // directly when simdjson itself confirms that the existing allocation
492+ // and/or trailing whitespace provide sufficient padding.
493+ if (prepared.view .has_sufficient_padding ()) {
494+ return prepared;
433495 }
434496
435- // TODO: Revisit zero-copy only when the caller can guarantee a stable
436- // buffer whose allocation is at least len + SIMDJSON_PADDING bytes.
437- //
438- // We intentionally keep the padded_string copy here. The current input is
439- // a const std::string built from the request-body snapshot/append path, so
440- // it does not provide guaranteed padding for simdjson's direct iterate()
441- // overloads. In practice large request bodies often end up with
442- // size() == capacity(), making any direct path allocator- and stdlib-
443- // dependent. padded_string keeps this backend deterministic until the
444- // caller can provide guaranteed lifetime and padding.
445497#ifdef MSC_JSON_AUDIT_INSTRUMENTATION
446498 const auto padded_start = std::chrono::steady_clock::now ();
447- simdjson::padded_string padded (input);
448- recordSimdjsonPaddedCopy (input_size , static_cast <std::uint64_t >(
499+ prepared. owned_copy = simdjson::padded_string (input);
500+ recordSimdjsonPaddedCopy (input. size () , static_cast <std::uint64_t >(
449501 std::chrono::duration_cast<std::chrono::nanoseconds>(
450502 std::chrono::steady_clock::now () - padded_start).count ()));
451503#else
452- simdjson::padded_string padded (input);
504+ prepared. owned_copy = simdjson::padded_string (input);
453505#endif
454- simdjson::ondemand::document document;
506+ prepared.view = prepared.owned_copy ;
507+ return prepared;
508+ }
509+
510+ JsonParseResult parsePreparedDocumentWithSimdjson (
511+ simdjson::padded_string_view input, JsonEventSink *sink,
512+ const JsonBackendParseOptions &options) {
513+ simdjson::ondemand::parser &parser = getReusableSimdjsonParser ();
514+ // This only prepares parser capacity and max-depth bookkeeping. Buffer
515+ // lifetime and padding must already have been handled by the caller.
516+ if (auto error = prepareParser (&parser, input.length (), options); error) {
517+ return fromSimdjsonError (error);
518+ }
455519
520+ simdjson::ondemand::document document;
456521#ifdef MSC_JSON_AUDIT_INSTRUMENTATION
457522 const auto iterate_start = std::chrono::steady_clock::now ();
458- if (auto error = parser.iterate(padded ).get (document); error) {
523+ if (auto error = parser.iterate(input ).get (document); error) {
459524 recordSimdjsonIterate (static_cast <std::uint64_t >(
460525 std::chrono::duration_cast<std::chrono::nanoseconds>(
461526 std::chrono::steady_clock::now () - iterate_start).count ()));
@@ -465,18 +530,37 @@ JsonParseResult parseDocumentWithSimdjson(const std::string &input,
465530 std::chrono::duration_cast<std::chrono::nanoseconds>(
466531 std::chrono::steady_clock::now () - iterate_start).count ()));
467532#else
468- if (auto error = parser.iterate(padded ).get (document); error) {
533+ if (auto error = parser.iterate(input ).get (document); error) {
469534 return fromSimdjsonError (error);
470535 }
471536#endif
472537
473- JsonBackendWalker walker (sink);
474- assert (input.data () == input_data);
475- assert (input.size () == input_size);
476- JsonParseResult walk_result = walker.walk (&document);
477- assert (input.data () == input_data);
478- assert (input.size () == input_size);
479- return walk_result;
538+ JsonBackendWalker walker (sink, options);
539+ return walker.walk (&document);
540+ }
541+
542+ } // namespace
543+
544+ JsonParseResult parseDocumentWithSimdjson (std::string &input,
545+ JsonEventSink *sink, const JsonBackendParseOptions &options) {
546+ if (sink == nullptr ) {
547+ return makeResult (JsonParseStatus::InternalError,
548+ JsonSinkStatus::InternalError, " JSON event sink is null." );
549+ }
550+
551+ PreparedSimdjsonInput prepared = prepareMutableSimdjsonInput (&input);
552+ return parsePreparedDocumentWithSimdjson (prepared.view , sink, options);
553+ }
554+
555+ JsonParseResult parseDocumentWithSimdjson (const std::string &input,
556+ JsonEventSink *sink, const JsonBackendParseOptions &options) {
557+ if (sink == nullptr ) {
558+ return makeResult (JsonParseStatus::InternalError,
559+ JsonSinkStatus::InternalError, " JSON event sink is null." );
560+ }
561+
562+ PreparedSimdjsonInput prepared = prepareConstSimdjsonInput (input);
563+ return parsePreparedDocumentWithSimdjson (prepared.view , sink, options);
480564}
481565
482566} // namespace RequestBodyProcessor
0 commit comments