building of pipeline (validation flaky)

2024-08-04 20:50:11 +02:00
parent e56fa9225c
commit 52a180b936
8 changed files with 168 additions and 53 deletions
--- a/libs/validators.py
+++ b/libs/validators.py
@@ -0,0 +1,53 @@
+from langchain_ollama.chat_models import ChatOllama
+from langchain_core.prompts import HumanMessagePromptTemplate, ChatPromptTemplate, SystemMessagePromptTemplate
+from langchain.tools import Tool
+from libs.test_class import Test
+
+def system_human_answer_match(test: Test, answer: str, base_url: str) -> bool:
+
+    def rate(rating: bool) -> None:
+        """Rate answer as correct (True) or as incorrect (False)."""
+
+    prompt = ChatPromptTemplate.from_messages([
+        SystemMessagePromptTemplate.from_template(template="""Rate the answer as correct, if the answer is  
+                                                  {validation_info}
+                                                  
+                                                  else as incorrect. Only use the rate tool. Do not answer conversationally."""),
+#         SystemMessagePromptTemplate.from_template(template="""You are a rating machine. You are given 3 things: The system message, the Human query, and the AI response. You evaluate the response as correct if 
+# {validation_info}
+
+# If the answer does not match these criteria, rate the answer as incorrect. If the answer is a "refusal" or a "declaration of incapability", the answer is automatically incorrect.
+
+# **Only use the rate tool. Do not under any circumstances answer conversationally**.
+# DO NOT ANSWER WITH <I'm sorry but I do not have the capability to perform this task for you...> or anything like it.
+# Use the rate tool!"""),
+        HumanMessagePromptTemplate.from_template(template="""System Message:
+{system_msg}
+
+Query:
+{human_msg}
+
+Answer:
+{answer}
+""")
+    ]).invoke({
+        "validation_info": test.validation_info,
+        "system_msg": test.system_msg,
+        "human_msg": test.human_msg,
+        "answer": answer
+    })
+
+    llm = ChatOllama(
+        model="llama3.1:70b",
+        # model="llama3-groq-tool-use:70b",
+        base_url=base_url
+    ).bind_tools([rate])
+
+    ai_msg = llm.invoke(prompt)
+
+    try:
+        return ai_msg.tool_calls[0]['args']['rating']
+    except IndexError as e:
+        print(f"\033[0;31mValidation Error \033[0mof {test.name} <{ai_msg.content[:20]}...> Retrying...")
+        return system_human_answer_match(test=test, answer=answer)
+