@@ -441,139 +441,7 @@ impl IndexingPipeline {
441441 sequencer : sequencer_handle,
442442 publisher : publisher_handle,
443443 next_check_for_progress : Instant :: now ( ) + * HEARTBEAT ,
444- } ) ) ;
445- Ok ( ( ) )
446- }
447-
448- /// Spawn the parquet pipeline using ParquetDocProcessor and ParquetIndexer.
449- ///
450- /// This pipeline routes metrics data through the Parquet/DataFusion path instead of
451- /// the Tantivy path. The output is ParquetSplit files written to the indexing directory.
452- #[ instrument(
453- name="spawn_parquet_pipeline" ,
454- level="info" ,
455- skip_all,
456- fields(
457- index=%self . params. pipeline_id. index_uid. index_id,
458- r#gen=self . generation( )
459- ) ) ]
460- async fn spawn_parquet_pipeline ( & mut self , ctx : & ActorContext < Self > ) -> anyhow:: Result < ( ) > {
461- let index_id = & self . params . pipeline_id . index_uid . index_id ;
462- let source_id = & self . params . pipeline_id . source_id ;
463-
464- info ! (
465- index_id,
466- source_id,
467- pipeline_uid=%self . params. pipeline_id. pipeline_uid,
468- root_dir=%self . params. indexing_directory. path( ) . display( ) ,
469- "spawning parquet indexing pipeline for metrics" ,
470- ) ;
471-
472- let ( source_mailbox, source_inbox) = ctx
473- . spawn_ctx ( )
474- . create_mailbox :: < SourceActor < ParquetDocProcessor > > (
475- "SourceActor" ,
476- QueueCapacity :: Unbounded ,
477- ) ;
478-
479- // ParquetPublisher
480- let parquet_publisher = ParquetPublisher :: new (
481- PublisherType :: ParquetPublisher ,
482- self . params . metastore . clone ( ) ,
483- None ,
484- Some ( source_mailbox. clone ( ) ) ,
485- ) ;
486- let ( parquet_publisher_mailbox, parquet_publisher_handle) = ctx
487- . spawn_actor ( )
488- . set_kill_switch ( self . kill_switch . clone ( ) )
489- . spawn ( parquet_publisher) ;
490-
491- // Sequencer for ordered delivery
492- let parquet_sequencer = Sequencer :: new ( parquet_publisher_mailbox) ;
493- let ( parquet_sequencer_mailbox, parquet_sequencer_handle) = ctx
494- . spawn_actor ( )
495- . set_kill_switch ( self . kill_switch . clone ( ) )
496- . spawn ( parquet_sequencer) ;
497-
498- // ParquetUploader
499- let parquet_uploader = ParquetUploader :: new (
500- super :: UploaderType :: IndexUploader ,
501- self . params . metastore . clone ( ) ,
502- self . params . storage . clone ( ) ,
503- SplitsUpdateMailbox :: Sequencer ( parquet_sequencer_mailbox) ,
504- self . params . max_concurrent_split_uploads_index ,
505- ) ;
506- let ( parquet_uploader_mailbox, parquet_uploader_handle) = ctx
507- . spawn_actor ( )
508- . set_kill_switch ( self . kill_switch . clone ( ) )
509- . spawn ( parquet_uploader) ;
510-
511- // ParquetPackager
512- let writer_config = quickwit_parquet_engine:: storage:: ParquetWriterConfig :: default ( ) ;
513- let table_config = quickwit_parquet_engine:: table_config:: TableConfig :: default ( ) ;
514- let split_writer = quickwit_parquet_engine:: storage:: ParquetSplitWriter :: new (
515- writer_config,
516- self . params . indexing_directory . path ( ) ,
517- & table_config,
518- ) ;
519- let parquet_packager = ParquetPackager :: new ( split_writer, parquet_uploader_mailbox) ;
520- let ( parquet_packager_mailbox, parquet_packager_handle) = ctx
521- . spawn_actor ( )
522- . set_kill_switch ( self . kill_switch . clone ( ) )
523- . spawn ( parquet_packager) ;
524-
525- // ParquetIndexer
526- let commit_timeout =
527- Duration :: from_secs ( self . params . indexing_settings . commit_timeout_secs as u64 ) ;
528- let parquet_indexer = ParquetIndexer :: new (
529- self . params . pipeline_id . index_uid . clone ( ) ,
530- source_id. to_string ( ) ,
531- None ,
532- parquet_packager_mailbox,
533- Some ( commit_timeout) ,
534- ) ;
535- let ( parquet_indexer_mailbox, parquet_indexer_handle) = ctx
536- . spawn_actor ( )
537- . set_kill_switch ( self . kill_switch . clone ( ) )
538- . spawn ( parquet_indexer) ;
539-
540- // ParquetDocProcessor
541- let parquet_doc_processor = ParquetDocProcessor :: new (
542- index_id. to_string ( ) ,
543- source_id. to_string ( ) ,
544- parquet_indexer_mailbox,
545- ) ;
546- let ( parquet_doc_processor_mailbox, parquet_doc_processor_handle) = ctx
547- . spawn_actor ( )
548- . set_kill_switch ( self . kill_switch . clone ( ) )
549- . spawn ( parquet_doc_processor) ;
550-
551- // Source (using parquet source loader)
552- let source_runtime = SourceRuntime {
553- pipeline_id : self . params . pipeline_id . clone ( ) ,
554- source_config : self . params . source_config . clone ( ) ,
555- metastore : self . params . metastore . clone ( ) ,
556- ingester_pool : self . params . ingester_pool . clone ( ) ,
557- queues_dir_path : self . params . queues_dir_path . clone ( ) ,
558- storage_resolver : self . params . source_storage_resolver . clone ( ) ,
559- event_broker : self . params . event_broker . clone ( ) ,
560- indexing_setting : self . params . indexing_settings . clone ( ) ,
561- } ;
562- let source = ctx
563- . protect_future ( quickwit_supported_parquet_sources ( ) . load_source ( source_runtime) )
564- . await ?;
565- let actor_source = SourceActor {
566- source,
567- processor_mailbox : parquet_doc_processor_mailbox,
568- } ;
569- let ( source_mailbox, source_handle) = ctx
570- . spawn_actor ( )
571- . set_mailboxes ( source_mailbox, source_inbox)
572- . set_kill_switch ( self . kill_switch . clone ( ) )
573- . spawn ( actor_source) ;
574- let assign_shards_message = AssignShards ( Assignment {
575- shard_ids : self . shard_ids . clone ( ) ,
576- } ) ) ;
444+ } ) ;
577445 Ok ( ( ) )
578446 }
579447
0 commit comments