71 lines
2.4 KiB
Python
71 lines
2.4 KiB
Python
from libs.test_class import Test
|
|
from libs.validators import system_human_answer_match
|
|
from libs.runnables import basic
|
|
|
|
import json
|
|
|
|
def padd(list, element):
|
|
longest = 0
|
|
for s in list:
|
|
longest = max(longest, len(str(s)))
|
|
return str(element).ljust(longest)
|
|
|
|
|
|
|
|
def run_tests(models: list[str], seeds: list[int], tests: list[Test], base_url: str):
|
|
|
|
# try:
|
|
with open("./saved_results.json", "r") as f:
|
|
saved_results = json.load(fp=f)
|
|
# except:
|
|
# saved_results = {}
|
|
|
|
|
|
# Get Results
|
|
run_results = {}
|
|
for model in models:
|
|
for seed in seeds:
|
|
for test in tests:
|
|
|
|
# Init dict
|
|
combination = {
|
|
'test_name': test.name,
|
|
'model': model,
|
|
'seed': seed,
|
|
}
|
|
hash_key = str(hash(json.dumps(combination, sort_keys=True)))
|
|
|
|
if hash_key not in saved_results.keys():
|
|
# try:
|
|
combination['answer'] = test.runnable(model=model, seed=seed, test=test, base_url=base_url)
|
|
combination['test'] = test
|
|
run_results[hash_key] = combination
|
|
print(f"Model {padd(models, model)} starting with seed {padd(seeds, seed)} is done with test '{test.name}'.")
|
|
# except Exception as e:
|
|
# print("\033[0;31mError:\033[0m " + str(e))
|
|
else:
|
|
print(f"Skipped {combination}")
|
|
|
|
|
|
# Validate Results
|
|
for hash_key in run_results:
|
|
result = run_results[hash_key]
|
|
|
|
entry = {
|
|
'test_name': result['test_name'],
|
|
'model': result['model'],
|
|
'seed': result['seed'],
|
|
'answer': result['answer'],
|
|
'validation': result['test'].validator(test=result['test'], answer=result['answer'], base_url=base_url)
|
|
}
|
|
|
|
saved_results[hash_key] = entry # add result with validation to saved results
|
|
|
|
print(f"Validation of answer from test {entry['test_name']} by {entry['model']} with seed {entry['seed']} evaluated to " + ('\033[0;32mcorrect\033[0m' if entry['validation'] == True else '\033[0;31mincorrect\033[0m'))
|
|
|
|
with open("./saved_results.json", "w") as f:
|
|
json.dump(fp=f, obj=saved_results, indent=4, ensure_ascii=False)
|
|
print("Dumped")
|
|
|
|
return saved_results
|