76 lines
2.2 KiB
Python
76 lines
2.2 KiB
Python
from libs.test_class import Test
|
|
from libs.run_tests import run_tests
|
|
from libs.runnables import *
|
|
from libs.validators import *
|
|
from libs.tools import *
|
|
|
|
from pprint import pprint
|
|
|
|
def main():
|
|
models = [
|
|
"llama3.1", # 8b
|
|
"llama3.1:70b",
|
|
"llama3-groq-tool-use", # latest
|
|
"llama3-groq-tool-use:70b",
|
|
# "mixtral:8x7b",
|
|
"mixtral:8x22b",
|
|
# "gemma2:2b",
|
|
# "phi3", # 3.8b
|
|
# "tinyllama:1.1b",
|
|
"mistral-nemo:12b",
|
|
# "command-r-plus:104b",
|
|
]
|
|
seeds = [
|
|
2,
|
|
222,
|
|
22222,
|
|
2222222
|
|
]
|
|
tests = [
|
|
Test(
|
|
name="Chinese Fruit",
|
|
runnable=basic,
|
|
runnable_input={
|
|
"system_msg": "You are a helpful assistant. You serve people across the globe.",
|
|
"human_msg": "什么蔬菜最健康?",
|
|
},
|
|
validator=system_human_answer_match,
|
|
validation_input={
|
|
"criteria": """- in Mandarin Chinese from front to finnish
|
|
- factually correct
|
|
- about healthy vegetables
|
|
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes)
|
|
|
|
Again, the message has to be entirely in Manadarin Chineese.
|
|
That means If the answer is not in Chinese the answer is NOT correct! Only if the message in in Chinese rate as correct""",
|
|
}
|
|
),
|
|
Test(
|
|
name="Simple Multiplication",
|
|
runnable=one_tool_call_answer,
|
|
runnable_input={
|
|
"system_msg": "You are a helpful assistant.",
|
|
"human_msg": "What is 234215 times 143243?",
|
|
"tools": {
|
|
"add": add,
|
|
"multiply": multiply
|
|
}
|
|
},
|
|
validator=regex_match_any,
|
|
validation_input={
|
|
"patterns": ["33549659245", "33,549,659,245", "33.549.659.245"]
|
|
# "patterns": ["3[,\. ]?3[,\. ]?5[,\. ]?4[,\. ]?9[,\. ]?6[,\. ]?5[,\. ]?9[,\. ]?2[,\. ]?4[,\. ]?5"] # Would accept 3.354.965.9245
|
|
}
|
|
),
|
|
]
|
|
|
|
results = run_tests(
|
|
models=models,
|
|
seeds=seeds,
|
|
tests=tests,
|
|
base_url="http://bolt.hs-mittweida.de:11434"
|
|
)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|