from libs.classes import Test, Model from libs.functions import nxhash from typing import Union import json def get_len(collection: Union[list, dict]) -> int: maximum_length = 0 if isinstance(collection, list): collection_type = "seeds" elif isinstance(collection, dict): if isinstance(collection[list(collection.keys())[0]], Model): collection_type = "models" elif isinstance(collection[list(collection.keys())[0]], Test): collection_type = "tests" else: raise TypeError("get_len: unsupported collection_type") else: raise TypeError("get_len: unsupported collection_type") match collection_type: case "models": for model_id in collection: maximum_length = max(maximum_length, len(collection[model_id].display_name)) case "seeds": for seed in collection: maximum_length = max(maximum_length, len(str(seed))) case "tests": for test_id in collection: maximum_length = max(maximum_length, len(collection[test_id].name)) case _: for model_name in collection: raise TypeError("get_len: unsupported collection_type") return maximum_length def run_tests(models: dict[int, Model], seeds: list[int], tests: dict[int, Test], base_url: str): try: print("Trying to load saved_results.json") with open("./saved_results.json", "r") as f: saved_results = json.load(fp=f) print("Loaded.") except FileNotFoundError: print("saved_results.json not found. Initializing empty.") saved_results = {} # Get Results run_results = {} print("Starting to run Tests ... ") for model_id in models: model = models[model_id] for test_id in tests: test = tests[test_id] for seed in seeds: # Init dict combination = { 'test_id': test_id, 'model_id': model_id, 'seed': seed, } hash_key = str(nxhash(json.dumps(combination, sort_keys=True))) combination['test_name'] = test.name combination['model_name'] = model.display_name # if hash_key == "DE3D137E": # pass if hash_key not in saved_results.keys(): try: print("\033[0;35mModel '\033[0m" + model.display_name + "\033[0;35m'" + (" " * (get_len(models) - len(model.display_name))) + " with seed \033[0m\033[0;30m" + ("0" * (get_len(seeds) - len(str(seed)))) + "\033[0m" + str(seed) + "\033[0;35m now runs test '\033[0m" + test.name + "\033[0;35m'" + (" " * (get_len(tests) - len(test.name))) + " (\033[0m" + hash_key + "\033[0;35m)\033[0m", end="" ) answer = test.runnable(model=model, seed=seed, test=test, base_url=base_url) if isinstance(answer, str): combination['answer'] = answer # combination['tool_calls'] = [] # no entry del answer elif isinstance(answer, dict): # calls combination['answer'] = answer['answer'] combination['tool_calls'] = answer['tool_calls'] del answer else: raise Exception(f"runnable returned unkown type {type(answer)}.") combination['test'] = test run_results[hash_key] = combination print("\r\033[0;32mModel '\033[0m" + model.display_name + "\033[0;32m'" + (" " * (get_len(models) - len(model.display_name))) + " with seed \033[0m\033[0;30m" + ("0" * (get_len(seeds) - len(str(seed)))) + "\033[0m" + str(seed) + "\033[0;32m finished test '\033[0m" + test.name + "\033[0;32m'" + (" " * (get_len(tests) - len(test.name))) + " (\033[0m" + hash_key + "\033[0;32m)\033[0m" ) except Exception as e: print("\r\033[0;31mError: <\033[0m" + str(e) + "\033[0;31m> at (\033[0m" + hash_key + "\033[0;31m). Continuing...\033[0m ") else: print("\r\033[0;34mModel '\033[0m" + model.display_name + "\033[0;34m'" + (" " * (get_len(models) - len(model.display_name))) + " with seed \033[0m\033[0;30m" + ("0" * (get_len(seeds) - len(str(seed)))) + "\033[0m" + str(seed) + "\033[0;34m skipped test '\033[0m" + test.name + "\033[0;34m'" + (" " * (get_len(tests) - len(test.name))) + " (\033[0m" + hash_key + "\033[0;34m) becasue its results exists in saved_results.json\033[0m" ) # Validate Results if run_results != {}: print("\nStarting validation of tests ...") for hash_key in run_results: result = run_results[hash_key] try: entry = { 'test_name': result['test_name'], 'test_id': result['test_id'], 'model_name': result['model_name'], 'model_id': result['model_id'], 'seed': result['seed'], 'answer': result['answer'], 'validation': result['test'].validator(test=result['test'], answer=result['answer'], base_url=base_url), } except Exception as e: print("\033[0;31mError validating entry (\033[0m" + hash_key + "\033[0;31m). <\033[0m" + str(e) + "\033[0;31m> Continuing...\033[0m ") continue try: entry['tool_calls'] = result['tool_calls'] except KeyError: pass saved_results[hash_key] = entry # add result with validation to saved results print("\033[0;36mTest results of model '\033[0m" + entry['model_name'] + "\033[0;36m'" + (" " * (get_len(models) - len(entry['model_name']))) + " with seed \033[0m\033[0;30m" + ("0" * (get_len(seeds) - len(str(entry['seed'])))) + "\033[0m" + str(entry['seed']) + "\033[0;36m on test '\033[0m" + entry['test_name'] + "\033[0;36m'" + (" " * (get_len(tests) - len(entry['test_name']))) + " (\033[0m" + hash_key + "\033[0;36m) evaluated to \033[0m" + ('\033[0;32mcorrect\033[0m' if entry['validation'] else '\033[0;31mincorrect\033[0m') ) with open("./saved_results.json", "w") as f: json.dump(fp=f, obj=saved_results, indent=4, sort_keys=True, ensure_ascii=False) return saved_results