54 lines
2.2 KiB
Python
54 lines
2.2 KiB
Python
from langchain_ollama.chat_models import ChatOllama
|
|
from langchain_core.prompts import HumanMessagePromptTemplate, ChatPromptTemplate, SystemMessagePromptTemplate
|
|
from langchain.tools import Tool
|
|
from libs.test_class import Test
|
|
|
|
def system_human_answer_match(test: Test, answer: str, base_url: str) -> bool:
|
|
|
|
def rate(rating: bool) -> None:
|
|
"""Rate answer as correct (True) or as incorrect (False)."""
|
|
|
|
prompt = ChatPromptTemplate.from_messages([
|
|
SystemMessagePromptTemplate.from_template(template="""Rate the answer as correct, if the answer is
|
|
{validation_info}
|
|
|
|
else as incorrect. Only use the rate tool. Do not answer conversationally."""),
|
|
# SystemMessagePromptTemplate.from_template(template="""You are a rating machine. You are given 3 things: The system message, the Human query, and the AI response. You evaluate the response as correct if
|
|
# {validation_info}
|
|
|
|
# If the answer does not match these criteria, rate the answer as incorrect. If the answer is a "refusal" or a "declaration of incapability", the answer is automatically incorrect.
|
|
|
|
# **Only use the rate tool. Do not under any circumstances answer conversationally**.
|
|
# DO NOT ANSWER WITH <I'm sorry but I do not have the capability to perform this task for you...> or anything like it.
|
|
# Use the rate tool!"""),
|
|
HumanMessagePromptTemplate.from_template(template="""System Message:
|
|
{system_msg}
|
|
|
|
Query:
|
|
{human_msg}
|
|
|
|
Answer:
|
|
{answer}
|
|
""")
|
|
]).invoke({
|
|
"validation_info": test.validation_info,
|
|
"system_msg": test.system_msg,
|
|
"human_msg": test.human_msg,
|
|
"answer": answer
|
|
})
|
|
|
|
llm = ChatOllama(
|
|
model="llama3.1:70b",
|
|
# model="llama3-groq-tool-use:70b",
|
|
base_url=base_url
|
|
).bind_tools([rate])
|
|
|
|
ai_msg = llm.invoke(prompt)
|
|
|
|
try:
|
|
return ai_msg.tool_calls[0]['args']['rating']
|
|
except IndexError as e:
|
|
print(f"\033[0;31mValidation Error \033[0mof {test.name} <{ai_msg.content[:20]}...> Retrying...")
|
|
return system_human_answer_match(test=test, answer=answer)
|
|
|