@@ -14,7 +14,14 @@ use serde::{Deserialize, Serialize};
1414
1515#[ derive( Debug , Clone , Serialize , Deserialize ) ]
1616pub struct JobOptions {
17+ /// Maximum number of active vertices processed in a single superstep.
1718 pub max_active_per_step : usize ,
19+ /// Maximum number of messages carried by a single `OutboundBatch` for one
20+ /// destination server.
21+ ///
22+ /// When a server receives more emitted messages than this in one
23+ /// superstep, the worker splits them into multiple `OutboundBatch`
24+ /// records. Values less than 1 are clamped to 1 by the worker.
1825 pub message_batch_size : usize ,
1926}
2027
@@ -37,6 +44,8 @@ pub struct SuperstepReport {
3744 pub active_vertices : usize ,
3845 pub messages_sent : usize ,
3946 pub bytes_sent : usize ,
47+ /// Number of outbound `OutboundBatch` chunks produced after grouping by
48+ /// destination server and splitting by `JobOptions::message_batch_size`.
4049 pub batches_sent : usize ,
4150 pub misrouted : usize ,
4251}
@@ -50,6 +59,11 @@ pub struct DeliverReport {
5059#[ derive( Debug , Default , Clone , Serialize , Deserialize ) ]
5160pub struct SuperstepOutput {
5261 pub report : SuperstepReport ,
62+ /// Outbound message chunks ready for delivery.
63+ ///
64+ /// There may be multiple entries for the same `server_id` when one
65+ /// superstep emits more than `JobOptions::message_batch_size` messages to
66+ /// that destination server.
5367 pub outbound : Vec < OutboundBatch > ,
5468}
5569
@@ -497,16 +511,36 @@ where
497511 . collect ( ) ;
498512 self . retry_adjacency . extend ( reduced. retry_adjacency . iter ( ) . copied ( ) ) ;
499513
514+ let batch_size = self . options . message_batch_size . max ( 1 ) ;
500515 let outbound: Vec < OutboundBatch > = reduced
501516 . outbound
502517 . into_iter ( )
503- . map ( |( server_id, messages) | {
518+ . flat_map ( |( server_id, messages) | {
504519 report. messages_sent += messages. len ( ) ;
505520 report. bytes_sent += messages. iter ( ) . map ( |( _, m) | m. len ( ) ) . sum :: < usize > ( ) ;
506- OutboundBatch {
507- server_id,
508- messages,
521+
522+ let mut batches = Vec :: new ( ) ;
523+ let mut current = Vec :: with_capacity ( batch_size. min ( messages. len ( ) ) ) ;
524+
525+ for message in messages {
526+ current. push ( message) ;
527+ if current. len ( ) == batch_size {
528+ batches. push ( OutboundBatch {
529+ server_id,
530+ messages : current,
531+ } ) ;
532+ current = Vec :: with_capacity ( batch_size) ;
533+ }
509534 }
535+
536+ if !current. is_empty ( ) {
537+ batches. push ( OutboundBatch {
538+ server_id,
539+ messages : current,
540+ } ) ;
541+ }
542+
543+ batches
510544 } )
511545 . collect ( ) ;
512546 report. batches_sent = outbound. len ( ) ;
0 commit comments