Files
test-small-llms/test_small_llms.py
Lennart J. Kurzweg (Nx2) 11f37009d3 extra file for printing
2024-08-08 16:52:44 +02:00

76 lines
2.2 KiB
Python

from libs.test_class import Test
from libs.run_tests import run_tests
from libs.runnables import *
from libs.validators import *
from libs.tools import *
from pprint import pprint
def main():
models = [
"llama3.1", # 8b
"llama3.1:70b",
"llama3-groq-tool-use", # latest
"llama3-groq-tool-use:70b",
# "mixtral:8x7b",
"mixtral:8x22b",
# "gemma2:2b",
# "phi3", # 3.8b
# "tinyllama:1.1b",
"mistral-nemo:12b",
# "command-r-plus:104b",
]
seeds = [
2,
222,
22222,
2222222
]
tests = [
Test(
name="Chinese Fruit",
runnable=basic,
runnable_input={
"system_msg": "You are a helpful assistant. You serve people across the globe.",
"human_msg": "什么蔬菜最健康?",
},
validator=system_human_answer_match,
validation_input={
"criteria": """- in Mandarin Chinese from front to finnish
- factually correct
- about healthy vegetables
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes)
Again, the message has to be entirely in Manadarin Chineese.
That means If the answer is not in Chinese the answer is NOT correct! Only if the message in in Chinese rate as correct""",
}
),
Test(
name="Simple Multiplication",
runnable=one_tool_call_answer,
runnable_input={
"system_msg": "You are a helpful assistant.",
"human_msg": "What is 234215 times 143243?",
"tools": {
"add": add,
"multiply": multiply
}
},
validator=regex_match_any,
validation_input={
"patterns": ["33549659245", "33,549,659,245", "33.549.659.245"]
# "patterns": ["3[,\. ]?3[,\. ]?5[,\. ]?4[,\. ]?9[,\. ]?6[,\. ]?5[,\. ]?9[,\. ]?2[,\. ]?4[,\. ]?5"] # Would accept 3.354.965.9245
}
),
]
results = run_tests(
models=models,
seeds=seeds,
tests=tests,
base_url="http://bolt.hs-mittweida.de:11434"
)
if __name__ == "__main__":
main()