from libs.test_class import Test from libs.run_tests import run_tests from libs.runnables import * from libs.validators import * from libs.tools import * from pprint import pprint def main(): models = [ "llama3.1", # 8b # "llama3.1:70b", # "llama3-groq-tool-use", # latest # "llama3-groq-tool-use:70b", # "mixtral:8x7b", # "mixtral:8x22b", # "gemma2:2b", # "phi3", # 3.8b # "tinyllama:1.1b", ] seeds = [ # 2, 222, 22222, # 2222222 ] tests = [ Test( name="Chinese Fruit", runnable=basic, runnable_input={ "system_msg": "You are a helpful assistant. You serve people across the globe.", "human_msg": "什么蔬菜最健康?", }, validator=system_human_answer_match, validation_input={ "criteria": """- in Mandarin Chinese - factually correct - just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes)""", } ), Test( name="Simple Multiplication", runnable=one_tool_call_answer, runnable_input={ "system_msg": "You are a helpful assistant.", "human_msg": "What is 234215 times 143243?", "tools": { "add": add, "multiply": multiply } }, validator=regex_match_any, validation_input={ "patterns": ["33549659245", "33,549,659,245", "33.549.659.245"] } ), ] results = run_tests( models=models, seeds=seeds, tests=tests, base_url="http://bolt.hs-mittweida.de:11434" ) print() for result in results: print(f"\n\033[0;36mtest_name:\033[0m {result['test'].name}\n\033[0;36mmodel:\033[0m {result['model']}\n\033[0;36mseed:\033[0m {result['seed']}\n\033[0;36mvalidation_result:\033[0m {result['validation']}\n\033[0;36manswer: ⏎\033[0m\n{result['result']}") if __name__ == "__main__": main()