from libs.test_class import Test from libs.validators import system_human_answer_match from libs.runnables import basic import json def padd(list, element): longest = 0 for s in list: longest = max(longest, len(str(s))) return str(element).ljust(longest) def run_tests(models: list[str], seeds: list[int], tests: list[Test], base_url: str): # try: with open("./saved_results.json", "r") as f: saved_results = json.load(fp=f) # except: # saved_results = {} # Get Results run_results = {} for model in models: for seed in seeds: for test in tests: # Init dict combination = { 'test_name': test.name, 'model': model, 'seed': seed, } hash_key = str(hash(json.dumps(combination, sort_keys=True))) if hash_key not in saved_results.keys(): # try: combination['answer'] = test.runnable(model=model, seed=seed, test=test, base_url=base_url) combination['test'] = test run_results[hash_key] = combination print(f"Model {padd(models, model)} starting with seed {padd(seeds, seed)} is done with test '{test.name}'.") # except Exception as e: # print("\033[0;31mError:\033[0m " + str(e)) else: print(f"Skipped {combination}") # Validate Results for hash_key in run_results: result = run_results[hash_key] entry = { 'test_name': result['test_name'], 'model': result['model'], 'seed': result['seed'], 'answer': result['answer'], 'validation': result['test'].validator(test=result['test'], answer=result['answer'], base_url=base_url) } saved_results[hash_key] = entry # add result with validation to saved results print(f"Validation of answer from test {entry['test_name']} by {entry['model']} with seed {entry['seed']} evaluated to " + ('\033[0;32mcorrect\033[0m' if entry['validation'] == True else '\033[0;31mincorrect\033[0m')) with open("./saved_results.json", "w") as f: json.dump(fp=f, obj=saved_results, indent=4, ensure_ascii=False) print("Dumped") return saved_results