@@ -477,43 +477,122 @@ int32_t mndGetDnodeData(SMnode *pMnode, SArray *pDnodeInfo) {
477477 * Keep both the previous and current cached offsets so
478478 * that during a DST transition window dnodes reporting
479479 * either the old or the new offset are both accepted.
480- * Refresh at most once per 60 s.
480+ * Refresh at most once per 60 s; the previous offset
481+ * is only accepted for a short grace period (3 min)
482+ * to avoid permanently weakening the check.
481483 */
482- static int64_t tsCachedTzOffset = 0 ;
484+ static int64_t tsCachedTzOffset = 0 ;
483485static int64_t tsCachedTzOffsetPrev = 0 ;
484- static int64_t tsCachedTzOffsetMs = 0 ;
485- static bool tsCachedHasPrev = false;
486- #define TZ_CACHE_REFRESH_MS 60000
486+ static int64_t tsCachedTzOffsetMs = 0 ;
487+ static int64_t tsCachedPrevSetMs = 0 ;
488+ static int8_t tsCachedHasPrev = 0 ;
489+ /*
490+ * Seqlock sequence: even = stable, odd = write in
491+ * progress. CAS from even→odd grants single-writer
492+ * access; store even+2 publishes the new snapshot.
493+ */
494+ static int64_t tsTzSeq = 0 ;
495+ #define TZ_CACHE_REFRESH_MS 60000
496+ #define TZ_PREV_GRACE_MS 180000
497+
498+ typedef struct {
499+ int64_t offset ;
500+ int64_t offsetPrev ;
501+ int64_t refreshMs ;
502+ int64_t prevSetMs ;
503+ int8_t hasPrev ;
504+ } STzSnapshot ;
487505
488- static bool mndCheckTimezoneOffset (int64_t dnodeOffset ) {
489- int64_t nowMs = taosGetTimestampMs ();
490-
491- if (tsCachedTzOffsetMs == 0 ||
492- nowMs - tsCachedTzOffsetMs >= TZ_CACHE_REFRESH_MS ) {
493- int32_t ret = taosGetLocalTimezoneOffset ();
494- if (ret == TSDB_CODE_TIME_ERROR ) {
495- mError ("failed to get local timezone offset since %s" ,
496- tstrerror (ret ));
497- terrno = ret ;
498- return false;
499- }
500- int64_t offset = (int64_t )ret ;
506+ /*
507+ * Read a consistent snapshot of the tz cache.
508+ * Spins while a writer is active (odd seq) or
509+ * if the snapshot was torn (seq changed).
510+ */
511+ static void mndReadTzSnapshot (STzSnapshot * s ) {
512+ int64_t seq ;
513+ do {
514+ seq = atomic_load_64 (& tsTzSeq );
515+ if (seq & 1 ) continue ;
516+ s -> offset = atomic_load_64 (& tsCachedTzOffset );
517+ s -> offsetPrev = atomic_load_64 (& tsCachedTzOffsetPrev );
518+ s -> refreshMs = atomic_load_64 (& tsCachedTzOffsetMs );
519+ s -> prevSetMs = atomic_load_64 (& tsCachedPrevSetMs );
520+ s -> hasPrev = atomic_load_8 (& tsCachedHasPrev );
521+ } while (atomic_load_64 (& tsTzSeq ) != seq );
522+ }
501523
502- if (tsCachedTzOffsetMs != 0 && offset != tsCachedTzOffset ) {
503- /* offset changed (DST edge) — keep the old one */
504- tsCachedTzOffsetPrev = tsCachedTzOffset ;
505- tsCachedHasPrev = true;
506- }
507- tsCachedTzOffset = offset ;
508- tsCachedTzOffsetMs = nowMs ;
524+ /*
525+ * Try to refresh the tz cache. Uses CAS on tsTzSeq
526+ * to ensure single-writer; if another thread is
527+ * already refreshing, this is a harmless no-op.
528+ */
529+ static void mndRefreshTzCache (int64_t nowMs ) {
530+ int64_t seq = atomic_load_64 (& tsTzSeq );
531+ if (seq & 1 ) return ;
532+ if (atomic_val_compare_exchange_64 (& tsTzSeq , seq , seq + 1 ) != seq ) {
533+ return ;
509534 }
510535
511- if (dnodeOffset == tsCachedTzOffset ) return true;
512- if (tsCachedHasPrev && dnodeOffset == tsCachedTzOffsetPrev ) return true;
536+ /* seq is now odd — we are the sole writer */
537+ int32_t code = TSDB_CODE_SUCCESS ;
538+ int64_t offset = (int64_t )taosGetLocalTimezoneOffset (& code );
539+ if (code != TSDB_CODE_SUCCESS ) {
540+ mError ("failed to get local timezone offset since %s" , tstrerror (code ));
541+ /* rollback: restore even seq */
542+ atomic_store_64 (& tsTzSeq , seq );
543+ return ;
544+ }
513545
546+ int64_t oldMs = atomic_load_64 (& tsCachedTzOffsetMs );
547+ int64_t oldOff = atomic_load_64 (& tsCachedTzOffset );
548+ if (oldMs != 0 && offset != oldOff ) {
549+ /* offset changed (DST edge) — keep old one */
550+ atomic_store_64 (& tsCachedTzOffsetPrev , oldOff );
551+ atomic_store_64 (& tsCachedPrevSetMs , nowMs );
552+ atomic_store_8 (& tsCachedHasPrev , 1 );
553+ }
554+ atomic_store_64 (& tsCachedTzOffset , offset );
555+ atomic_store_64 (& tsCachedTzOffsetMs , nowMs );
556+
557+ /* publish: even seq+2 => readers see new state */
558+ atomic_store_64 (& tsTzSeq , seq + 2 );
559+ }
560+
561+ static bool mndMatchTzSnapshot (const STzSnapshot * s , int64_t dnodeOff ,
562+ int64_t nowMs ) {
563+ if (dnodeOff == s -> offset ) return true;
564+ if (s -> hasPrev && nowMs - s -> prevSetMs < TZ_PREV_GRACE_MS &&
565+ dnodeOff == s -> offsetPrev ) {
566+ return true;
567+ }
514568 return false;
515569}
516570
571+ static bool mndCheckTimezoneOffset (int64_t dnodeOffset ) {
572+ STzSnapshot snap ;
573+ int64_t nowMs = taosGetTimestampMs ();
574+
575+ mndReadTzSnapshot (& snap );
576+
577+ if (snap .refreshMs == 0 || nowMs - snap .refreshMs >= TZ_CACHE_REFRESH_MS ) {
578+ mndRefreshTzCache (nowMs );
579+ mndReadTzSnapshot (& snap );
580+ }
581+
582+ if (mndMatchTzSnapshot (& snap , dnodeOffset , nowMs ))
583+ return true;
584+
585+ /*
586+ * dnodeOffset doesn't match — force an immediate
587+ * refresh in case we are stale after a DST switch,
588+ * then re-check.
589+ */
590+ mndRefreshTzCache (nowMs );
591+ mndReadTzSnapshot (& snap );
592+
593+ return mndMatchTzSnapshot (& snap , dnodeOffset , nowMs );
594+ }
595+
517596static int32_t mndCheckClusterCfgPara (SMnode * pMnode , SDnodeObj * pDnode , const SClusterCfg * pCfg ) {
518597 CHECK_MONITOR_PARA (tsEnableMonitor , DND_REASON_STATUS_MONITOR_SWITCH_NOT_MATCH );
519598 CHECK_MONITOR_PARA (tsMonitorInterval , DND_REASON_STATUS_MONITOR_INTERVAL_NOT_MATCH );
0 commit comments