building of pipeline (validation flaky)
This commit is contained in:
54
test_small_llms.py
Normal file
54
test_small_llms.py
Normal file
@@ -0,0 +1,54 @@
|
||||
from libs.test_class import Test
|
||||
from libs.run_tests import run_tests
|
||||
from libs.runnables import *
|
||||
from libs.validators import *
|
||||
|
||||
from pprint import pprint
|
||||
|
||||
def main():
|
||||
models = [
|
||||
# "llama3.1", # 8b
|
||||
# "llama3.1:70b",
|
||||
# "llama3-groq-tool-use", # latest
|
||||
# "llama3-groq-tool-use:70b",
|
||||
# "mixtral:8x7b",
|
||||
# "mixtral:8x22b",
|
||||
# "gemma2:2b",
|
||||
# "phi3", # 3.8b
|
||||
"tinyllama:1.1b",
|
||||
]
|
||||
seeds = [
|
||||
# 2,
|
||||
222,
|
||||
# 22222,
|
||||
2222222
|
||||
]
|
||||
tests = [
|
||||
Test(
|
||||
name="Chinese Fruit",
|
||||
system_msg="You are a helpful assistant. You serve people across the globe. You can be a freind, but stay professional.",
|
||||
human_msg="什么蔬菜最健康?",
|
||||
validation_info="""- in Mandarin Chinese
|
||||
- factually correct
|
||||
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes)""",
|
||||
runnable=basic,
|
||||
validator=system_human_answer_match
|
||||
),
|
||||
# Test(
|
||||
# name="Simple Multiplication",
|
||||
# system_msg=
|
||||
# )
|
||||
]
|
||||
|
||||
results = run_tests(
|
||||
models=models,
|
||||
seeds=seeds,
|
||||
tests=tests,
|
||||
base_url="http://bolt.hs-mittweida.de:11434"
|
||||
)
|
||||
|
||||
print()
|
||||
for result in results: print(f"\n\033[0;36mtest_name:\033[0m {result['test'].name}\n\033[0;36mmodel:\033[0m {result['model']}\n\033[0;36mseed:\033[0m {result['seed']}\n\033[0;36mvalidation_result:\033[0m {result['validation']}\n\033[0;36manswer: ⏎\033[0m\n{result['result']}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user