@@ -22,72 +22,110 @@ export async function startCluster(
2222 workerFile : string ,
2323 options : ClusterOptions = { }
2424) {
25+ const availableWorkers = typeof os . availableParallelism === 'function'
26+ ? os . availableParallelism ( )
27+ : os . cpus ( ) . length ;
2528 const {
26- workers = os . cpus ( ) . length ,
29+ workers = availableWorkers ,
2730 respawnDelay = 1000 ,
2831 maxRestarts = 5
2932 } = options ;
33+ const workerCount = Math . max ( 1 , Math . min ( workers , availableWorkers ) ) ;
3034
3135 if ( cluster . isPrimary ) {
36+ cluster . schedulingPolicy = cluster . SCHED_RR ;
37+
3238 logger . info ( 'Starting cluster mode' , {
33- workers,
34- cpus : os . cpus ( ) . length ,
39+ workers : workerCount ,
40+ cpus : availableWorkers ,
3541 platform : os . platform ( ) ,
36- memory : `${ Math . round ( os . totalmem ( ) / 1024 / 1024 / 1024 ) } GB`
42+ memory : `${ Math . round ( os . totalmem ( ) / 1024 / 1024 / 1024 ) } GB` ,
43+ schedulingPolicy : 'round-robin'
3744 } ) ;
3845
3946 const workerRestarts = new Map < number , number > ( ) ;
47+ const workerSlots = new Map < number , number > ( ) ;
48+ let isShuttingDown = false ;
49+ let healthCheckInterval : NodeJS . Timeout | undefined ;
4050
4151 // Spawn workers
42- for ( let i = 0 ; i < workers ; i ++ ) {
43- spawnWorker ( i + 1 ) ;
52+ for ( let slot = 1 ; slot <= workerCount ; slot ++ ) {
53+ spawnWorker ( slot , workerSlots ) ;
4454 }
4555
4656 // Handle worker exit
4757 cluster . on ( 'exit' , ( worker , code , signal ) => {
4858 const workerId = worker . id ;
49- const restarts = workerRestarts . get ( workerId ) || 0 ;
59+ const workerSlot = workerSlots . get ( workerId ) || workerId ;
60+ const restarts = workerRestarts . get ( workerSlot ) || 0 ;
61+
62+ workerSlots . delete ( workerId ) ;
5063
5164 logger . warn ( 'Worker died' , {
5265 workerId,
66+ workerSlot,
5367 pid : worker . process . pid ,
5468 code,
5569 signal,
5670 restarts
5771 } ) ;
5872
73+ if ( isShuttingDown ) {
74+ return ;
75+ }
76+
5977 // Check if we should respawn
6078 if ( restarts < maxRestarts ) {
61- workerRestarts . set ( workerId , restarts + 1 ) ;
79+ workerRestarts . set ( workerSlot , restarts + 1 ) ;
6280
6381 setTimeout ( ( ) => {
64- logger . info ( 'Respawning worker' , { workerId, attempt : restarts + 1 } ) ;
65- spawnWorker ( workerId ) ;
82+ if ( isShuttingDown ) {
83+ return ;
84+ }
85+
86+ logger . info ( 'Respawning worker' , { workerId, workerSlot, attempt : restarts + 1 } ) ;
87+ spawnWorker ( workerSlot , workerSlots ) ;
6688 } , respawnDelay ) ;
6789 } else {
68- logger . error ( 'Worker exceeded max restarts' , undefined , { workerId, maxRestarts } ) ;
90+ logger . error ( 'Worker exceeded max restarts' , undefined , {
91+ workerId,
92+ workerSlot,
93+ maxRestarts,
94+ } ) ;
6995 }
7096 } ) ;
7197
7298 // Handle worker online
7399 cluster . on ( 'online' , ( worker ) => {
100+ const workerSlot = workerSlots . get ( worker . id ) || worker . id ;
74101 logger . info ( 'Worker online' , {
75102 workerId : worker . id ,
103+ workerSlot,
76104 pid : worker . process . pid
77105 } ) ;
78106 } ) ;
79107
80108 // Handle worker listening
81109 cluster . on ( 'listening' , ( worker , address ) => {
110+ const workerSlot = workerSlots . get ( worker . id ) || worker . id ;
82111 logger . info ( 'Worker listening' , {
83112 workerId : worker . id ,
113+ workerSlot,
84114 pid : worker . process . pid ,
85115 address : `${ address . address } :${ address . port } `
86116 } ) ;
87117 } ) ;
88118
89119 // Graceful shutdown
90120 const shutdown = async ( ) => {
121+ if ( isShuttingDown ) {
122+ return ;
123+ }
124+
125+ isShuttingDown = true ;
126+ if ( healthCheckInterval ) {
127+ clearInterval ( healthCheckInterval ) ;
128+ }
91129 logger . info ( 'Shutting down cluster...' ) ;
92130
93131 const workers = Object . values ( cluster . workers || { } ) ;
@@ -124,7 +162,7 @@ export async function startCluster(
124162 process . on ( 'SIGINT' , shutdown ) ;
125163
126164 // Performance monitoring
127- setInterval ( ( ) => {
165+ healthCheckInterval = setInterval ( ( ) => {
128166 const workers = Object . values ( cluster . workers || { } ) ;
129167 const activeWorkers = workers . filter ( w => w && ! w . isDead ( ) ) . length ;
130168
@@ -136,22 +174,53 @@ export async function startCluster(
136174 } ) ;
137175 } , 60000 ) ; // Every minute
138176
177+ healthCheckInterval . unref ( ) ;
178+
139179 } else {
140180 // Worker process - import and run the application
141181 try {
142- await import ( workerFile ) ;
182+ const workerModule = await import ( workerFile ) as {
183+ startServer ?: ( ) => Promise < unknown > ;
184+ stopServer ?: ( ) => Promise < void > ;
185+ } ;
186+ let isWorkerShuttingDown = false ;
187+
188+ if ( typeof workerModule . startServer === 'function' ) {
189+ await workerModule . startServer ( ) ;
190+ }
191+
192+ const shutdownWorker = async ( ) => {
193+ if ( isWorkerShuttingDown ) {
194+ return ;
195+ }
196+
197+ isWorkerShuttingDown = true ;
198+ logger . info ( 'Worker received shutdown signal' , {
199+ workerId : cluster . worker ?. id ,
200+ workerSlot : process . env . WORKER_SLOT ,
201+ } ) ;
202+
203+ try {
204+ await workerModule . stopServer ?.( ) ;
205+ } catch ( error ) {
206+ logger . error ( 'Worker failed to shut down cleanly' , error as Error , {
207+ workerId : cluster . worker ?. id ,
208+ workerSlot : process . env . WORKER_SLOT ,
209+ } ) ;
210+ } finally {
211+ process . exit ( 0 ) ;
212+ }
213+ } ;
143214
144215 // Handle shutdown signal from master
145216 process . on ( 'message' , ( msg ) => {
146217 if ( msg === 'shutdown' ) {
147- logger . info ( 'Worker received shutdown signal' , {
148- workerId : cluster . worker ?. id
149- } ) ;
150-
151- // Gracefully close connections
152- process . exit ( 0 ) ;
218+ void shutdownWorker ( ) ;
153219 }
154220 } ) ;
221+
222+ process . once ( 'SIGTERM' , ( ) => void shutdownWorker ( ) ) ;
223+ process . once ( 'SIGINT' , ( ) => void shutdownWorker ( ) ) ;
155224
156225 } catch ( error ) {
157226 logger . error ( 'Worker failed to start' , error as Error , {
@@ -165,9 +234,11 @@ export async function startCluster(
165234/**
166235 * Spawn a new worker
167236 */
168- function spawnWorker ( workerId : number ) {
169- const worker = cluster . fork ( ) ;
170- worker . id = workerId ;
237+ function spawnWorker ( workerSlot : number , workerSlots : Map < number , number > ) {
238+ const worker = cluster . fork ( {
239+ WORKER_SLOT : String ( workerSlot ) ,
240+ } ) ;
241+ workerSlots . set ( worker . id , workerSlot ) ;
171242 return worker ;
172243}
173244
0 commit comments