🥅 Handle cancellation (#94)

evaline-ju · web-flow · commit cd7cb80f65ab · 2026-04-09T11:47:15.000-06:00
* 💡 Update test comments on CI

Signed-off-by: Evaline Ju &lt;69598118+evaline-ju@users.noreply.github.com&gt;

* ⏪ Put back CI comment

Signed-off-by: Evaline Ju &lt;69598118+evaline-ju@users.noreply.github.com&gt;

* ♻️ Separate integration tests

Signed-off-by: Evaline Ju &lt;69598118+evaline-ju@users.noreply.github.com&gt;

* 🥅 Handle cancellations

Signed-off-by: Evaline Ju &lt;69598118+evaline-ju@users.noreply.github.com&gt;

* ✅ Add tests for shutdown

Signed-off-by: Evaline Ju &lt;69598118+evaline-ju@users.noreply.github.com&gt;

* 🔧 Configure probes

Signed-off-by: Evaline Ju &lt;69598118+evaline-ju@users.noreply.github.com&gt;

* ⚡ Cache envoy protos for integration tests

Signed-off-by: Evaline Ju &lt;69598118+evaline-ju@users.noreply.github.com&gt;

---------

Signed-off-by: Evaline Ju &lt;69598118+evaline-ju@users.noreply.github.com&gt;
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -85,8 +85,26 @@ jobs:
       - name: Install uv
         run: pip install uv
 
+      # Cache compiled protobuf files across CI runs
+      - name: Extract proto commit hash
+        id: proto-hash
+        run: |
+          echo "hash=$(grep 'ENVOY_DATA_PLANE_COMMIT=' proto-build.sh | cut -d'"' -f2)" >> "$GITHUB_OUTPUT"
+
+      - name: Cache protobuf files
+        id: proto-cache
+        uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7  # v5.0.4
+        with:
+          path: |
+            src/envoy
+            src/xds
+            src/validate
+            src/udpa
+          key: protos-${{ steps.proto-hash.outputs.hash }}
+
       # Build generated protos (gitignored, needed for real envoy imports)
       - name: Build protobuf files
+        if: steps.proto-cache.outputs.cache-hit != 'true'
         run: |
           uv sync --group proto
           USE_HTTPS=true ./proto-build.sh
diff --git a/ext-proc.yaml b/ext-proc.yaml
@@ -27,6 +27,8 @@ spec:
       labels:
         app: plugins-adapter
     spec:
+      # Allow 35s for graceful shutdown: 5s preStop + 15s gRPC drain + margin
+      terminationGracePeriodSeconds: 35
       securityContext:
         runAsNonRoot: true
         runAsUser: 1000
@@ -67,3 +69,23 @@ spec:
               value: "./"
           ports:
             - containerPort: 50052
+          lifecycle:
+            preStop:
+              exec:
+                # Delay SIGTERM so Envoy/Istio can remove this pod from
+                # its upstream list before we start draining streams.
+                command: ["/bin/sleep", "5"]
+          # gRPC health probes rely on the grpc-health-checking service
+          # registered in serve()
+          readinessProbe:
+            grpc:
+              port: 50052
+            initialDelaySeconds: 5
+            periodSeconds: 10
+            failureThreshold: 3
+          livenessProbe:
+            grpc:
+              port: 50052
+            initialDelaySeconds: 10
+            periodSeconds: 30
+            failureThreshold: 3
diff --git a/pyproject.toml b/pyproject.toml
@@ -6,6 +6,7 @@ requires-python = ">=3.11"
 dependencies = [
     "grpcio>=1.78.0",
     "grpcio-tools>=1.78.0",
+    "grpcio-health-checking>=1.78.0",
     "betterproto2==0.9.1",
     "cpex==0.1.0.dev10",
 ]
diff --git a/src/server.py b/src/server.py
@@ -3,6 +3,7 @@
 import json
 import logging
 import os
+import signal
 from typing import AsyncIterator
 
 import grpc
@@ -23,6 +24,8 @@
 from envoy.service.ext_proc.v3 import external_processor_pb2 as ep
 from envoy.service.ext_proc.v3 import external_processor_pb2_grpc as ep_grpc
 from envoy.type.v3 import http_status_pb2 as http_status_pb2
+from grpc_health.v1 import health as grpc_health
+from grpc_health.v1 import health_pb2, health_pb2_grpc
 
 # ============================================================================
 # LOGGING CONFIGURATION
@@ -330,108 +333,111 @@ async def Process(
         req_body_buf = bytearray()
         resp_body_buf = bytearray()
 
-        async for request in request_iterator:
-            # ----------------------------------------------------------------
-            # Request Headers Processing
-            # ----------------------------------------------------------------
-            if request.HasField("request_headers"):
-                _headers = request.request_headers.headers
-                yield ep.ProcessingResponse(
-                    request_headers=ep.HeadersResponse(
-                        response=ep.CommonResponse(
-                            header_mutation=ep.HeaderMutation(
-                                set_headers=[
-                                    core.HeaderValueOption(
-                                        header=core.HeaderValue(
-                                            key="x-ext-proc-header",
-                                            raw_value="hello-from-ext-proc".encode("utf-8"),
-                                        ),
-                                        append_action=core.HeaderValueOption.APPEND_IF_EXISTS_OR_ADD,
-                                    )
-                                ]
+        try:
+            async for request in request_iterator:
+                # ----------------------------------------------------------------
+                # Request Headers Processing
+                # ----------------------------------------------------------------
+                if request.HasField("request_headers"):
+                    _headers = request.request_headers.headers
+                    yield ep.ProcessingResponse(
+                        request_headers=ep.HeadersResponse(
+                            response=ep.CommonResponse(
+                                header_mutation=ep.HeaderMutation(
+                                    set_headers=[
+                                        core.HeaderValueOption(
+                                            header=core.HeaderValue(
+                                                key="x-ext-proc-header",
+                                                raw_value="hello-from-ext-proc".encode("utf-8"),
+                                            ),
+                                            append_action=core.HeaderValueOption.APPEND_IF_EXISTS_OR_ADD,
+                                        )
+                                    ]
+                                )
                             )
                         )
                     )
-                )
-            # ----------------------------------------------------------------
-            # Response Headers Processing
-            # ----------------------------------------------------------------
-            elif request.HasField("response_headers"):
-                _headers = request.response_headers.headers
-                yield ep.ProcessingResponse(
-                    response_headers=ep.HeadersResponse(
-                        response=ep.CommonResponse(
-                            header_mutation=ep.HeaderMutation(
-                                set_headers=[
-                                    core.HeaderValueOption(
-                                        header=core.HeaderValue(
-                                            key="x-ext-proc-response-header",
-                                            raw_value="processed-by-ext-proc".encode("utf-8"),
-                                        ),
-                                        append_action=core.HeaderValueOption.APPEND_IF_EXISTS_OR_ADD,
-                                    )
-                                ]
+                # ----------------------------------------------------------------
+                # Response Headers Processing
+                # ----------------------------------------------------------------
+                elif request.HasField("response_headers"):
+                    _headers = request.response_headers.headers
+                    yield ep.ProcessingResponse(
+                        response_headers=ep.HeadersResponse(
+                            response=ep.CommonResponse(
+                                header_mutation=ep.HeaderMutation(
+                                    set_headers=[
+                                        core.HeaderValueOption(
+                                            header=core.HeaderValue(
+                                                key="x-ext-proc-response-header",
+                                                raw_value="processed-by-ext-proc".encode("utf-8"),
+                                            ),
+                                            append_action=core.HeaderValueOption.APPEND_IF_EXISTS_OR_ADD,
+                                        )
+                                    ]
+                                )
                             )
                         )
                     )
-                )
-
-            # ----------------------------------------------------------------
-            # Request Body Processing (MCP Tool/Prompt Invocations)
-            # ----------------------------------------------------------------
-            elif request.HasField("request_body") and request.request_body.body:
-                chunk = request.request_body.body
-                req_body_buf.extend(chunk)
-
-                if getattr(request.request_body, "end_of_stream", False):
-                    try:
-                        text = req_body_buf.decode("utf-8")
-                    except UnicodeDecodeError:
-                        logger.debug("Request body not UTF-8; skipping")
-                    else:
-                        logger.info(json.loads(text))
-                        body = json.loads(text)
-                        if "method" in body and body["method"] == "tools/call":
-                            body_resp = await getToolPreInvokeResponse(body)
-                        elif "method" in body and body["method"] == "prompts/get":
-                            body_resp = await getPromptPreFetchResponse(body)
+
+                # ----------------------------------------------------------------
+                # Request Body Processing (MCP Tool/Prompt Invocations)
+                # ----------------------------------------------------------------
+                elif request.HasField("request_body") and request.request_body.body:
+                    chunk = request.request_body.body
+                    req_body_buf.extend(chunk)
+
+                    if getattr(request.request_body, "end_of_stream", False):
+                        try:
+                            text = req_body_buf.decode("utf-8")
+                        except UnicodeDecodeError:
+                            logger.debug("Request body not UTF-8; skipping")
                         else:
-                            body_resp = ep.ProcessingResponse(
-                                request_body=ep.BodyResponse(response=ep.CommonResponse())
-                            )
+                            logger.info(json.loads(text))
+                            body = json.loads(text)
+                            if "method" in body and body["method"] == "tools/call":
+                                body_resp = await getToolPreInvokeResponse(body)
+                            elif "method" in body and body["method"] == "prompts/get":
+                                body_resp = await getPromptPreFetchResponse(body)
+                            else:
+                                body_resp = ep.ProcessingResponse(
+                                    request_body=ep.BodyResponse(response=ep.CommonResponse())
+                                )
+                            yield body_resp
+
+                        req_body_buf.clear()
+
+                # ----------------------------------------------------------------
+                # Response Body Processing (MCP Tool Results)
+                # ----------------------------------------------------------------
+                elif request.HasField("response_body"):
+                    logger.debug(f"Processing response body: {request}")
+
+                    # Buffer content if present in this chunk
+                    if request.response_body.body:
+                        chunk = request.response_body.body
+                        resp_body_buf.extend(chunk)
+                        logger.debug(f"Buffered chunk ({len(chunk)} bytes)")
+
+                    # Check for end of stream (regardless of whether this chunk has content)
+                    if getattr(request.response_body, "end_of_stream", False):
+                        logger.debug("End of stream reached, processing complete buffered response")
+
+                        # Process the buffered content
+                        body_resp = await process_response_body_buffer(resp_body_buf)
                         yield body_resp
+                        resp_body_buf.clear()
+                    else:
+                        # Intermediate chunk - acknowledge but don't process yet
+                        logger.debug("Buffering intermediate chunk, waiting for end_of_stream")
+                        yield ep.ProcessingResponse(response_body=ep.BodyResponse(response=ep.CommonResponse()))
 
-                    req_body_buf.clear()
-
-            # ----------------------------------------------------------------
-            # Response Body Processing (MCP Tool Results)
-            # ----------------------------------------------------------------
-            elif request.HasField("response_body"):
-                logger.debug(f"Processing response body: {request}")
-
-                # Buffer content if present in this chunk
-                if request.response_body.body:
-                    chunk = request.response_body.body
-                    resp_body_buf.extend(chunk)
-                    logger.debug(f"Buffered chunk ({len(chunk)} bytes)")
-
-                # Check for end of stream (regardless of whether this chunk has content)
-                if getattr(request.response_body, "end_of_stream", False):
-                    logger.debug("End of stream reached, processing complete buffered response")
-
-                    # Process the buffered content
-                    body_resp = await process_response_body_buffer(resp_body_buf)
-                    yield body_resp
-                    resp_body_buf.clear()
                 else:
-                    # Intermediate chunk - acknowledge but don't process yet
-                    logger.debug("Buffering intermediate chunk, waiting for end_of_stream")
-                    yield ep.ProcessingResponse(response_body=ep.BodyResponse(response=ep.CommonResponse()))
-
-            else:
-                # Unhandled request types
-                logger.warning("Not processed")
-                logger.warning(request)
+                    # Unhandled request types
+                    logger.warning("Not processed")
+                    logger.warning(request)
+        except asyncio.CancelledError:
+            logger.info("Process stream cancelled (client disconnect or pod rollover)")
 
 
 # ============================================================================
@@ -452,13 +458,31 @@ async def serve(host: str = "0.0.0.0", port: int = 50052):
     logger.debug(f"Loaded {manager.plugin_count} plugins")
 
     server = grpc.aio.server()
-    # server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
     ep_grpc.add_ExternalProcessorServicer_to_server(ExtProcServicer(), server)
+
+    # Register gRPC health check service for Kubernetes readiness/liveness probes
+    health_servicer = grpc_health.HealthServicer()
+    health_pb2_grpc.add_HealthServicer_to_server(health_servicer, server)
+
     listen_addr = f"{host}:{port}"
     server.add_insecure_port(listen_addr)
-    logger.info("Starting ext_proc MY server on %s", listen_addr)
+    logger.info("Starting ext_proc server on %s", listen_addr)
     await server.start()
-    # wait forever
+
+    # Mark server as healthy after startup
+    health_servicer.set("", health_pb2.HealthCheckResponse.SERVING)
+
+    # Install SIGTERM handler for graceful drain on pod rollover
+    loop = asyncio.get_running_loop()
+
+    async def _shutdown():
+        logger.info("SIGTERM received — draining in-flight streams (grace=15s)")
+        health_servicer.set("", health_pb2.HealthCheckResponse.NOT_SERVING)
+        await server.stop(grace=15)
+
+    loop.add_signal_handler(signal.SIGTERM, lambda: asyncio.ensure_future(_shutdown()))
+    logger.info("SIGTERM handler registered; waiting for termination")
+
     await server.wait_for_termination()
 
 
diff --git a/tests/integration/test_ext_proc_e2e.py b/tests/integration/test_ext_proc_e2e.py
@@ -4,6 +4,7 @@
 and exercise the full request/response flow.
 """
 
+import asyncio
 import json
 
 import pytest
@@ -215,3 +216,38 @@ async def test_response_body_tool_result_blocked(grpc_stub):
         assert "Blocked by test" in error_body["error"]["message"]
     finally:
         PassthroughPlugin.reset()
+
+
+# ---------------------------------------------------------------------------
+# Stream Cancellation (simulates pod rollover)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_stream_cancel_does_not_crash_server(grpc_stub):
+    """Cancelling a bidi stream mid-flight should not crash the server.
+
+    After cancellation, a subsequent request should still succeed,
+    confirming the server is still healthy.
+    """
+    # Open a stream and cancel it without finishing
+    call = grpc_stub.Process()
+    request = ep.ProcessingRequest(
+        request_headers=ep.HttpHeaders(
+            headers=core.HeaderMap(headers=[]),
+        )
+    )
+    await call.write(request)
+    call.cancel()
+
+    # Small delay for the server to process the cancellation
+    await asyncio.sleep(0.1)
+
+    # Verify the server is still operational with a normal request
+    follow_up = ep.ProcessingRequest(
+        request_headers=ep.HttpHeaders(
+            headers=core.HeaderMap(headers=[]),
+        )
+    )
+    response = await send_one(grpc_stub, follow_up)
+    assert response.HasField("request_headers")
diff --git a/tests/test_graceful_shutdown.py b/tests/test_graceful_shutdown.py

Original file line number	Diff line number	Diff line change
`@@ -6,6 +6,7 @@ requires-python = ">=3.11"`
`6`	`6`	`dependencies = [`
`7`	`7`	`"grpcio>=1.78.0",`
`8`	`8`	`"grpcio-tools>=1.78.0",`
	`9`	`+ "grpcio-health-checking>=1.78.0",`
`9`	`10`	`"betterproto2==0.9.1",`
`10`	`11`	`"cpex==0.1.0.dev10",`
`11`	`12`	`]`