30 lines
1.3 KiB
Python
30 lines
1.3 KiB
Python
from libs.test_class import Test
|
|
from libs.validators import system_human_answer_match
|
|
from libs.runnables import basic
|
|
|
|
def padd(list, element):
|
|
longest = 0
|
|
for s in list:
|
|
longest = max(longest, len(str(s)))
|
|
return str(element).ljust(longest)
|
|
|
|
def run_tests(models: list[str], seeds: list[int], tests: list[Test], base_url: str):
|
|
results = []
|
|
esc = "\033"
|
|
for model in models:
|
|
for seed in seeds:
|
|
for test in tests:
|
|
try:
|
|
result = test.runnable(model=model, seed=seed, test=test, base_url=base_url)
|
|
results.append({"test": test,"model": model, "seed": seed, "result": result})
|
|
print(f"Model {padd(models, model)} starting with seed {padd(seeds, seed)} is done with test '{test.name}'.")
|
|
except Exception as e:
|
|
print("\033[0;31mError:\033[0m" + e)
|
|
|
|
for result in results:
|
|
result['validation'] = test.validator(test=result['test'], answer=result['result'], base_url=base_url)
|
|
|
|
print(f"Validation of answer from test {result['test'].name} by {result['model']} with seed {result['seed']} evaluated to " + ('\033[0;32mcorrect\033[0m' if result['validation'] == True else '\033[0;31mincorrect\033[0m'))
|
|
|
|
return results
|