Skip to content

Commit b4b0a37

Browse files
fix(cli): fix tool metric model in cli eval (#276)
* fix(cli): fix tool metric model in cli eval * fix tracers bugs
1 parent 993a9f6 commit b4b0a37

3 files changed

Lines changed: 12 additions & 5 deletions

File tree

veadk/agent.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,10 @@
3333
DEFAULT_MODEL_EXTRA_CONFIG,
3434
)
3535
from veadk.evaluation import EvalSetRecorder
36-
from veadk.processors import BaseRunProcessor, NoOpRunProcessor
3736
from veadk.knowledgebase import KnowledgeBase
3837
from veadk.memory.long_term_memory import LongTermMemory
3938
from veadk.memory.short_term_memory import ShortTermMemory
39+
from veadk.processors import BaseRunProcessor, NoOpRunProcessor
4040
from veadk.prompts.agent_default_prompt import DEFAULT_DESCRIPTION, DEFAULT_INSTRUCTION
4141
from veadk.tracing.base_tracer import BaseTracer
4242
from veadk.utils.logger import get_logger
@@ -298,6 +298,10 @@ def _prepare_tracers(self):
298298
enable_cozeloop_tracer = os.getenv("ENABLE_COZELOOP", "false").lower() == "true"
299299
enable_tls_tracer = os.getenv("ENABLE_TLS", "false").lower() == "true"
300300

301+
if not (enable_apmplus_tracer or enable_cozeloop_tracer or enable_tls_tracer):
302+
logger.info("No exporter enabled by env, skip prepare tracers.")
303+
return
304+
301305
if not self.tracers:
302306
from veadk.tracing.telemetry.opentelemetry_tracer import OpentelemetryTracer
303307

veadk/cli/cli_eval.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ def eval(
205205
],
206206
model=judge_model,
207207
),
208-
ToolCorrectnessMetric(threshold=0.5),
208+
ToolCorrectnessMetric(threshold=0.5, model=judge_model),
209209
]
210210

211211
asyncio.run(

veadk/evaluation/deepeval_evaluator/deepeval_evaluator.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# limitations under the License.
1414

1515
import time
16+
from typing import Optional
1617

1718
from deepeval import evaluate
1819
from deepeval.evaluate import CacheConfig
@@ -21,9 +22,9 @@
2122
from deepeval.models import LocalModel
2223
from deepeval.test_case import LLMTestCase
2324
from deepeval.test_case.llm_test_case import ToolCall
24-
from typing_extensions import override
25-
from typing import Optional
2625
from google.adk.evaluation.eval_set import EvalSet
26+
from typing_extensions import override
27+
2728
from veadk.config import getenv
2829
from veadk.evaluation.base_evaluator import BaseEvaluator, EvalResultData, MetricResult
2930
from veadk.evaluation.types import EvalResultCaseData, EvalResultMetadata
@@ -113,7 +114,9 @@ def __init__(
113114
super().__init__(agent=agent, name=name)
114115

115116
if not judge_model_api_key:
116-
judge_model_api_key = getenv("MODEL_JUDGE_API_KEY")
117+
judge_model_api_key = getenv("MODEL_JUDGE_API_KEY") or getenv(
118+
"MODEL_AGENT_API_KEY"
119+
)
117120
if not judge_model_name:
118121
judge_model_name = getenv(
119122
"MODEL_JUDGE_NAME",

0 commit comments

Comments
 (0)