-
Notifications
You must be signed in to change notification settings - Fork 317
Expand file tree
/
Copy path22_agents_judge_critic.py
More file actions
102 lines (79 loc) · 3.29 KB
/
22_agents_judge_critic.py
File metadata and controls
102 lines (79 loc) · 3.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
"""
This example shows the LLM as a judge pattern. The first agent generates a stock summary
from the research notes and the second agent evaluates the summary. The first agent is asked
to continually improve the summary until the evaluator gives a pass.
`3_research_notes.txt` is the text file generated by our previous section where our multi-agent
orchestration pattern is demonstrated.
Usage:
python 4_judge_critic.py
🤖: What company are you interested in?
👧: bbca
"""
from dotenv import load_dotenv
import asyncio
from dataclasses import dataclass
from typing import Literal
from agents import Agent, ItemHelpers, Runner, TResponseInputItem, trace, function_tool
load_dotenv()
@function_tool
def read_company_data_from_txt() -> str:
"""
Read company data from the text file 3_research_notes.txt
"""
try:
with open("3_research_notes.txt", "r") as file:
data = file.read()
print(data)
return data
except FileNotFoundError:
return "File not found. Please ensure the file exists."
except Exception as e:
return str(e)
read_company_data_from_txt = Agent(
name="read_company_data_from_txt",
instructions=(
"Given a company name or ticker by the user, read the company data from the text file 3_research_notes.txt"
"Summarize them into 2-3 paragraphs and be informative so it reads like a professional report."
"If there is any feedback, incorporate them to improve the report. If the ticker is not found, say so."
),
tools=[read_company_data_from_txt],
)
@dataclass
class EvaluationFeedback:
feedback: str
score: Literal["pass", "expect_improvement", "fail"]
evaluator = Agent[None](
name="evaluator",
instructions=(
"You evaluate a stock overview summary and decide if it's good enough."
"If it's not good enough, you provide feedback on what needs to be improved."
"Never give it a pass on the first try, but be increasingly generous so its chance of passing increases over time."
),
output_type=EvaluationFeedback,
)
async def main() -> None:
msg = input("🤖: What company are you interested in? \n👧: ")
input_items: list[TResponseInputItem] = [{"content": msg, "role": "user"}]
summary: str | None = None
# We'll run the entire workflow in a single trace
with trace("LLM as a judge"):
while True:
summarized_results = await Runner.run(
read_company_data_from_txt,
input_items,
)
input_items = summarized_results.to_input_list()
summary = ItemHelpers.text_message_outputs(summarized_results.new_items)
print("Stock overview summary generated")
evaluator_result = await Runner.run(evaluator, input_items)
result: EvaluationFeedback = evaluator_result.final_output
print(f"Evaluator score: {result.score}")
if result.score == "pass":
print("The stock summary is 💡 good enough, exiting.")
break
print("Re-running with feedback")
input_items.append({"content": f"Feedback: {result.feedback}", "role": "user"})
print(f"Final Summary: {summary}")
print("Input items:", input_items)
if __name__ == "__main__":
asyncio.run(main())