33import json
44import logging
55import os
6+ import signal
67from typing import AsyncIterator
78
89import grpc
2324from envoy .service .ext_proc .v3 import external_processor_pb2 as ep
2425from envoy .service .ext_proc .v3 import external_processor_pb2_grpc as ep_grpc
2526from envoy .type .v3 import http_status_pb2 as http_status_pb2
27+ from grpc_health .v1 import health as grpc_health
28+ from grpc_health .v1 import health_pb2 , health_pb2_grpc
2629
2730# ============================================================================
2831# LOGGING CONFIGURATION
@@ -330,108 +333,111 @@ async def Process(
330333 req_body_buf = bytearray ()
331334 resp_body_buf = bytearray ()
332335
333- async for request in request_iterator :
334- # ----------------------------------------------------------------
335- # Request Headers Processing
336- # ----------------------------------------------------------------
337- if request .HasField ("request_headers" ):
338- _headers = request .request_headers .headers
339- yield ep .ProcessingResponse (
340- request_headers = ep .HeadersResponse (
341- response = ep .CommonResponse (
342- header_mutation = ep .HeaderMutation (
343- set_headers = [
344- core .HeaderValueOption (
345- header = core .HeaderValue (
346- key = "x-ext-proc-header" ,
347- raw_value = "hello-from-ext-proc" .encode ("utf-8" ),
348- ),
349- append_action = core .HeaderValueOption .APPEND_IF_EXISTS_OR_ADD ,
350- )
351- ]
336+ try :
337+ async for request in request_iterator :
338+ # ----------------------------------------------------------------
339+ # Request Headers Processing
340+ # ----------------------------------------------------------------
341+ if request .HasField ("request_headers" ):
342+ _headers = request .request_headers .headers
343+ yield ep .ProcessingResponse (
344+ request_headers = ep .HeadersResponse (
345+ response = ep .CommonResponse (
346+ header_mutation = ep .HeaderMutation (
347+ set_headers = [
348+ core .HeaderValueOption (
349+ header = core .HeaderValue (
350+ key = "x-ext-proc-header" ,
351+ raw_value = "hello-from-ext-proc" .encode ("utf-8" ),
352+ ),
353+ append_action = core .HeaderValueOption .APPEND_IF_EXISTS_OR_ADD ,
354+ )
355+ ]
356+ )
352357 )
353358 )
354359 )
355- )
356- # ----------------------------------------------------------------
357- # Response Headers Processing
358- # ----------------------------------------------------------------
359- elif request .HasField ( " response_headers" ):
360- _headers = request . response_headers . headers
361- yield ep .ProcessingResponse (
362- response_headers = ep .HeadersResponse (
363- response = ep .CommonResponse (
364- header_mutation = ep . HeaderMutation (
365- set_headers = [
366- core .HeaderValueOption (
367- header = core . HeaderValue (
368- key = "x- ext-proc-response-header" ,
369- raw_value = "processed-by-ext-proc" . encode ( "utf-8" ),
370- ) ,
371- append_action = core . HeaderValueOption . APPEND_IF_EXISTS_OR_ADD ,
372- )
373- ]
360+ # ----------------------------------------------------------------
361+ # Response Headers Processing
362+ # ----------------------------------------------------------------
363+ elif request . HasField ( "response_headers" ):
364+ _headers = request .response_headers . headers
365+ yield ep . ProcessingResponse (
366+ response_headers = ep .HeadersResponse (
367+ response = ep .CommonResponse (
368+ header_mutation = ep .HeaderMutation (
369+ set_headers = [
370+ core . HeaderValueOption (
371+ header = core .HeaderValue (
372+ key = "x-ext-proc-response-header" ,
373+ raw_value = "processed-by- ext-proc" . encode ( "utf-8" ) ,
374+ ),
375+ append_action = core . HeaderValueOption . APPEND_IF_EXISTS_OR_ADD ,
376+ )
377+ ]
378+ )
374379 )
375380 )
376381 )
377- )
378-
379- # ----------------------------------------------------------------
380- # Request Body Processing (MCP Tool/Prompt Invocations)
381- # ----------------------------------------------------------------
382- elif request .HasField ("request_body" ) and request .request_body .body :
383- chunk = request .request_body .body
384- req_body_buf .extend (chunk )
385-
386- if getattr (request .request_body , "end_of_stream" , False ):
387- try :
388- text = req_body_buf .decode ("utf-8" )
389- except UnicodeDecodeError :
390- logger .debug ("Request body not UTF-8; skipping" )
391- else :
392- logger .info (json .loads (text ))
393- body = json .loads (text )
394- if "method" in body and body ["method" ] == "tools/call" :
395- body_resp = await getToolPreInvokeResponse (body )
396- elif "method" in body and body ["method" ] == "prompts/get" :
397- body_resp = await getPromptPreFetchResponse (body )
382+
383+ # ----------------------------------------------------------------
384+ # Request Body Processing (MCP Tool/Prompt Invocations)
385+ # ----------------------------------------------------------------
386+ elif request .HasField ("request_body" ) and request .request_body .body :
387+ chunk = request .request_body .body
388+ req_body_buf .extend (chunk )
389+
390+ if getattr (request .request_body , "end_of_stream" , False ):
391+ try :
392+ text = req_body_buf .decode ("utf-8" )
393+ except UnicodeDecodeError :
394+ logger .debug ("Request body not UTF-8; skipping" )
398395 else :
399- body_resp = ep .ProcessingResponse (
400- request_body = ep .BodyResponse (response = ep .CommonResponse ())
401- )
396+ logger .info (json .loads (text ))
397+ body = json .loads (text )
398+ if "method" in body and body ["method" ] == "tools/call" :
399+ body_resp = await getToolPreInvokeResponse (body )
400+ elif "method" in body and body ["method" ] == "prompts/get" :
401+ body_resp = await getPromptPreFetchResponse (body )
402+ else :
403+ body_resp = ep .ProcessingResponse (
404+ request_body = ep .BodyResponse (response = ep .CommonResponse ())
405+ )
406+ yield body_resp
407+
408+ req_body_buf .clear ()
409+
410+ # ----------------------------------------------------------------
411+ # Response Body Processing (MCP Tool Results)
412+ # ----------------------------------------------------------------
413+ elif request .HasField ("response_body" ):
414+ logger .debug (f"Processing response body: { request } " )
415+
416+ # Buffer content if present in this chunk
417+ if request .response_body .body :
418+ chunk = request .response_body .body
419+ resp_body_buf .extend (chunk )
420+ logger .debug (f"Buffered chunk ({ len (chunk )} bytes)" )
421+
422+ # Check for end of stream (regardless of whether this chunk has content)
423+ if getattr (request .response_body , "end_of_stream" , False ):
424+ logger .debug ("End of stream reached, processing complete buffered response" )
425+
426+ # Process the buffered content
427+ body_resp = await process_response_body_buffer (resp_body_buf )
402428 yield body_resp
429+ resp_body_buf .clear ()
430+ else :
431+ # Intermediate chunk - acknowledge but don't process yet
432+ logger .debug ("Buffering intermediate chunk, waiting for end_of_stream" )
433+ yield ep .ProcessingResponse (response_body = ep .BodyResponse (response = ep .CommonResponse ()))
403434
404- req_body_buf .clear ()
405-
406- # ----------------------------------------------------------------
407- # Response Body Processing (MCP Tool Results)
408- # ----------------------------------------------------------------
409- elif request .HasField ("response_body" ):
410- logger .debug (f"Processing response body: { request } " )
411-
412- # Buffer content if present in this chunk
413- if request .response_body .body :
414- chunk = request .response_body .body
415- resp_body_buf .extend (chunk )
416- logger .debug (f"Buffered chunk ({ len (chunk )} bytes)" )
417-
418- # Check for end of stream (regardless of whether this chunk has content)
419- if getattr (request .response_body , "end_of_stream" , False ):
420- logger .debug ("End of stream reached, processing complete buffered response" )
421-
422- # Process the buffered content
423- body_resp = await process_response_body_buffer (resp_body_buf )
424- yield body_resp
425- resp_body_buf .clear ()
426435 else :
427- # Intermediate chunk - acknowledge but don't process yet
428- logger .debug ("Buffering intermediate chunk, waiting for end_of_stream" )
429- yield ep .ProcessingResponse (response_body = ep .BodyResponse (response = ep .CommonResponse ()))
430-
431- else :
432- # Unhandled request types
433- logger .warning ("Not processed" )
434- logger .warning (request )
436+ # Unhandled request types
437+ logger .warning ("Not processed" )
438+ logger .warning (request )
439+ except asyncio .CancelledError :
440+ logger .info ("Process stream cancelled (client disconnect or pod rollover)" )
435441
436442
437443# ============================================================================
@@ -452,13 +458,31 @@ async def serve(host: str = "0.0.0.0", port: int = 50052):
452458 logger .debug (f"Loaded { manager .plugin_count } plugins" )
453459
454460 server = grpc .aio .server ()
455- # server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
456461 ep_grpc .add_ExternalProcessorServicer_to_server (ExtProcServicer (), server )
462+
463+ # Register gRPC health check service for Kubernetes readiness/liveness probes
464+ health_servicer = grpc_health .HealthServicer ()
465+ health_pb2_grpc .add_HealthServicer_to_server (health_servicer , server )
466+
457467 listen_addr = f"{ host } :{ port } "
458468 server .add_insecure_port (listen_addr )
459- logger .info ("Starting ext_proc MY server on %s" , listen_addr )
469+ logger .info ("Starting ext_proc server on %s" , listen_addr )
460470 await server .start ()
461- # wait forever
471+
472+ # Mark server as healthy after startup
473+ health_servicer .set ("" , health_pb2 .HealthCheckResponse .SERVING )
474+
475+ # Install SIGTERM handler for graceful drain on pod rollover
476+ loop = asyncio .get_running_loop ()
477+
478+ async def _shutdown ():
479+ logger .info ("SIGTERM received — draining in-flight streams (grace=15s)" )
480+ health_servicer .set ("" , health_pb2 .HealthCheckResponse .NOT_SERVING )
481+ await server .stop (grace = 15 )
482+
483+ loop .add_signal_handler (signal .SIGTERM , lambda : asyncio .ensure_future (_shutdown ()))
484+ logger .info ("SIGTERM handler registered; waiting for termination" )
485+
462486 await server .wait_for_termination ()
463487
464488
0 commit comments