from langchain_ollama.chat_models import ChatOllama from langchain_core.prompts import HumanMessagePromptTemplate, ChatPromptTemplate, SystemMessagePromptTemplate from langchain.tools import Tool from libs.test_class import Test def system_human_answer_match(test: Test, answer: str, base_url: str) -> bool: def rate(rating: bool) -> None: """Rate answer as correct (True) or as incorrect (False).""" prompt = ChatPromptTemplate.from_messages([ SystemMessagePromptTemplate.from_template(template="""Rate the answer as correct, if the answer is {validation_info} else as incorrect. Only use the rate tool. Do not answer conversationally."""), # SystemMessagePromptTemplate.from_template(template="""You are a rating machine. You are given 3 things: The system message, the Human query, and the AI response. You evaluate the response as correct if # {validation_info} # If the answer does not match these criteria, rate the answer as incorrect. If the answer is a "refusal" or a "declaration of incapability", the answer is automatically incorrect. # **Only use the rate tool. Do not under any circumstances answer conversationally**. # DO NOT ANSWER WITH or anything like it. # Use the rate tool!"""), HumanMessagePromptTemplate.from_template(template="""System Message: {system_msg} Query: {human_msg} Answer: {answer} """) ]).invoke({ "validation_info": test.validation_info, "system_msg": test.system_msg, "human_msg": test.human_msg, "answer": answer }) llm = ChatOllama( model="llama3.1:70b", # model="llama3-groq-tool-use:70b", base_url=base_url ).bind_tools([rate]) ai_msg = llm.invoke(prompt) try: return ai_msg.tool_calls[0]['args']['rating'] except IndexError as e: print(f"\033[0;31mValidation Error \033[0mof {test.name} <{ai_msg.content[:20]}...> Retrying...") return system_human_answer_match(test=test, answer=answer)