building of pipeline (validation flaky)

This commit is contained in:
Lennart J. Kurzweg (Nx2)
2024-08-04 20:50:11 +02:00
parent e56fa9225c
commit 52a180b936
8 changed files with 168 additions and 53 deletions

54
test_small_llms.py Normal file
View File

@@ -0,0 +1,54 @@
from libs.test_class import Test
from libs.run_tests import run_tests
from libs.runnables import *
from libs.validators import *
from pprint import pprint
def main():
models = [
# "llama3.1", # 8b
# "llama3.1:70b",
# "llama3-groq-tool-use", # latest
# "llama3-groq-tool-use:70b",
# "mixtral:8x7b",
# "mixtral:8x22b",
# "gemma2:2b",
# "phi3", # 3.8b
"tinyllama:1.1b",
]
seeds = [
# 2,
222,
# 22222,
2222222
]
tests = [
Test(
name="Chinese Fruit",
system_msg="You are a helpful assistant. You serve people across the globe. You can be a freind, but stay professional.",
human_msg="什么蔬菜最健康?",
validation_info="""- in Mandarin Chinese
- factually correct
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes)""",
runnable=basic,
validator=system_human_answer_match
),
# Test(
# name="Simple Multiplication",
# system_msg=
# )
]
results = run_tests(
models=models,
seeds=seeds,
tests=tests,
base_url="http://bolt.hs-mittweida.de:11434"
)
print()
for result in results: print(f"\n\033[0;36mtest_name:\033[0m {result['test'].name}\n\033[0;36mmodel:\033[0m {result['model']}\n\033[0;36mseed:\033[0m {result['seed']}\n\033[0;36mvalidation_result:\033[0m {result['validation']}\n\033[0;36manswer: ⏎\033[0m\n{result['result']}")
if __name__ == "__main__":
main()