55 lines
1.6 KiB
Python
55 lines
1.6 KiB
Python
from libs.test_class import Test
|
|
from libs.run_tests import run_tests
|
|
from libs.runnables import *
|
|
from libs.validators import *
|
|
|
|
from pprint import pprint
|
|
|
|
def main():
|
|
models = [
|
|
# "llama3.1", # 8b
|
|
# "llama3.1:70b",
|
|
# "llama3-groq-tool-use", # latest
|
|
# "llama3-groq-tool-use:70b",
|
|
# "mixtral:8x7b",
|
|
# "mixtral:8x22b",
|
|
# "gemma2:2b",
|
|
# "phi3", # 3.8b
|
|
"tinyllama:1.1b",
|
|
]
|
|
seeds = [
|
|
# 2,
|
|
222,
|
|
# 22222,
|
|
2222222
|
|
]
|
|
tests = [
|
|
Test(
|
|
name="Chinese Fruit",
|
|
system_msg="You are a helpful assistant. You serve people across the globe. You can be a freind, but stay professional.",
|
|
human_msg="什么蔬菜最健康?",
|
|
validation_info="""- in Mandarin Chinese
|
|
- factually correct
|
|
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes)""",
|
|
runnable=basic,
|
|
validator=system_human_answer_match
|
|
),
|
|
# Test(
|
|
# name="Simple Multiplication",
|
|
# system_msg=
|
|
# )
|
|
]
|
|
|
|
results = run_tests(
|
|
models=models,
|
|
seeds=seeds,
|
|
tests=tests,
|
|
base_url="http://bolt.hs-mittweida.de:11434"
|
|
)
|
|
|
|
print()
|
|
for result in results: print(f"\n\033[0;36mtest_name:\033[0m {result['test'].name}\n\033[0;36mmodel:\033[0m {result['model']}\n\033[0;36mseed:\033[0m {result['seed']}\n\033[0;36mvalidation_result:\033[0m {result['validation']}\n\033[0;36manswer: ⏎\033[0m\n{result['result']}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|