@@ -477,43 +477,121 @@ int32_t mndGetDnodeData(SMnode *pMnode, SArray *pDnodeInfo) {
477477 * Keep both the previous and current cached offsets so
478478 * that during a DST transition window dnodes reporting
479479 * either the old or the new offset are both accepted.
480- * Refresh at most once per 60 s.
480+ * Refresh at most once per 60 s; the previous offset
481+ * is only accepted for a short grace period (3 min)
482+ * to avoid permanently weakening the check.
481483 */
482- static int64_t tsCachedTzOffset = 0 ;
484+ static int64_t tsCachedTzOffset = 0 ;
483485static int64_t tsCachedTzOffsetPrev = 0 ;
484- static int64_t tsCachedTzOffsetMs = 0 ;
485- static bool tsCachedHasPrev = false;
486- #define TZ_CACHE_REFRESH_MS 60000
486+ static int64_t tsCachedTzOffsetMs = 0 ;
487+ static int64_t tsCachedPrevSetMs = 0 ;
488+ static int8_t tsCachedHasPrev = 0 ;
489+ /*
490+ * Seqlock sequence: even = stable, odd = write in
491+ * progress. CAS from even→odd grants single-writer
492+ * access; store even+2 publishes the new snapshot.
493+ */
494+ static int64_t tsTzSeq = 0 ;
495+ #define TZ_CACHE_REFRESH_MS 60000
496+ #define TZ_PREV_GRACE_MS 180000
497+
498+ typedef struct {
499+ int64_t offset ;
500+ int64_t offsetPrev ;
501+ int64_t refreshMs ;
502+ int64_t prevSetMs ;
503+ int8_t hasPrev ;
504+ } STzSnapshot ;
487505
488- static bool mndCheckTimezoneOffset (int64_t dnodeOffset ) {
489- int64_t nowMs = taosGetTimestampMs ();
490-
491- if (tsCachedTzOffsetMs == 0 ||
492- nowMs - tsCachedTzOffsetMs >= TZ_CACHE_REFRESH_MS ) {
493- int32_t ret = taosGetLocalTimezoneOffset ();
494- if (ret == TSDB_CODE_TIME_ERROR ) {
495- mError ("failed to get local timezone offset since %s" ,
496- tstrerror (ret ));
497- terrno = ret ;
498- return false;
499- }
500- int64_t offset = (int64_t )ret ;
506+ /*
507+ * Read a consistent snapshot of the tz cache.
508+ * Spins while a writer is active (odd seq) or
509+ * if the snapshot was torn (seq changed).
510+ */
511+ static void mndReadTzSnapshot (STzSnapshot * s ) {
512+ int64_t seq ;
513+ do {
514+ seq = atomic_load_64 (& tsTzSeq );
515+ if (seq & 1 ) continue ;
516+ s -> offset = atomic_load_64 (& tsCachedTzOffset );
517+ s -> offsetPrev = atomic_load_64 (& tsCachedTzOffsetPrev );
518+ s -> refreshMs = atomic_load_64 (& tsCachedTzOffsetMs );
519+ s -> prevSetMs = atomic_load_64 (& tsCachedPrevSetMs );
520+ s -> hasPrev = atomic_load_8 (& tsCachedHasPrev );
521+ } while (atomic_load_64 (& tsTzSeq ) != seq );
522+ }
501523
502- if (tsCachedTzOffsetMs != 0 && offset != tsCachedTzOffset ) {
503- /* offset changed (DST edge) — keep the old one */
504- tsCachedTzOffsetPrev = tsCachedTzOffset ;
505- tsCachedHasPrev = true;
506- }
507- tsCachedTzOffset = offset ;
508- tsCachedTzOffsetMs = nowMs ;
524+ /*
525+ * Try to refresh the tz cache. Uses CAS on tsTzSeq
526+ * to ensure single-writer; if another thread is
527+ * already refreshing, this is a harmless no-op.
528+ */
529+ static void mndRefreshTzCache (int64_t nowMs ) {
530+ int64_t seq = atomic_load_64 (& tsTzSeq );
531+ if (seq & 1 ) return ;
532+ if (atomic_val_compare_exchange_64 (& tsTzSeq , seq , seq + 1 ) != seq ) {
533+ return ;
509534 }
510535
511- if (dnodeOffset == tsCachedTzOffset ) return true;
512- if (tsCachedHasPrev && dnodeOffset == tsCachedTzOffsetPrev ) return true;
536+ /* seq is now odd — we are the sole writer */
537+ int64_t offset = (int64_t )taosGetLocalTimezoneOffset ();
538+ if (offset == TSDB_CODE_TIME_ERROR ) {
539+ mError ("failed to get local timezone offset since %s" , tstrerror (offset ));
540+ /* rollback: restore even seq */
541+ atomic_store_64 (& tsTzSeq , seq );
542+ return ;
543+ }
513544
545+ int64_t oldMs = atomic_load_64 (& tsCachedTzOffsetMs );
546+ int64_t oldOff = atomic_load_64 (& tsCachedTzOffset );
547+ if (oldMs != 0 && offset != oldOff ) {
548+ /* offset changed (DST edge) — keep old one */
549+ atomic_store_64 (& tsCachedTzOffsetPrev , oldOff );
550+ atomic_store_64 (& tsCachedPrevSetMs , nowMs );
551+ atomic_store_8 (& tsCachedHasPrev , 1 );
552+ }
553+ atomic_store_64 (& tsCachedTzOffset , offset );
554+ atomic_store_64 (& tsCachedTzOffsetMs , nowMs );
555+
556+ /* publish: even seq+2 => readers see new state */
557+ atomic_store_64 (& tsTzSeq , seq + 2 );
558+ }
559+
560+ static bool mndMatchTzSnapshot (const STzSnapshot * s , int64_t dnodeOff ,
561+ int64_t nowMs ) {
562+ if (dnodeOff == s -> offset ) return true;
563+ if (s -> hasPrev && nowMs - s -> prevSetMs < TZ_PREV_GRACE_MS &&
564+ dnodeOff == s -> offsetPrev ) {
565+ return true;
566+ }
514567 return false;
515568}
516569
570+ static bool mndCheckTimezoneOffset (int64_t dnodeOffset ) {
571+ STzSnapshot snap ;
572+ int64_t nowMs = taosGetTimestampMs ();
573+
574+ mndReadTzSnapshot (& snap );
575+
576+ if (snap .refreshMs == 0 || nowMs - snap .refreshMs >= TZ_CACHE_REFRESH_MS ) {
577+ mndRefreshTzCache (nowMs );
578+ mndReadTzSnapshot (& snap );
579+ }
580+
581+ if (mndMatchTzSnapshot (& snap , dnodeOffset , nowMs ))
582+ return true;
583+
584+ /*
585+ * dnodeOffset doesn't match — force an immediate
586+ * refresh in case we are stale after a DST switch,
587+ * then re-check.
588+ */
589+ mndRefreshTzCache (nowMs );
590+ mndReadTzSnapshot (& snap );
591+
592+ return mndMatchTzSnapshot (& snap , dnodeOffset , nowMs );
593+ }
594+
517595static int32_t mndCheckClusterCfgPara (SMnode * pMnode , SDnodeObj * pDnode , const SClusterCfg * pCfg ) {
518596 CHECK_MONITOR_PARA (tsEnableMonitor , DND_REASON_STATUS_MONITOR_SWITCH_NOT_MATCH );
519597 CHECK_MONITOR_PARA (tsMonitorInterval , DND_REASON_STATUS_MONITOR_INTERVAL_NOT_MATCH );
0 commit comments