from libs.test_class import Test from libs.validators import system_human_answer_match from libs.runnables import basic import json def padd(list, element): longest = 0 for s in list: longest = max(longest, len(str(s))) return str(element).ljust(longest) def nxhash(text:str): # @BenVida StackOverflow hash=0 for ch in text: hash = ( hash*281 ^ ord(ch)*997) & 0xFFFFFFFF return hex(hash)[2:].upper().zfill(8) def get_len(l: list) -> int: m = 0 for e in l: if isinstance(e, Test): m = max(m, len(e.name)) elif isinstance(e, str): m = max(m, len(e)) elif isinstance(e, int): m = max(m, len(str(e))) else: raise Exception(f"get_len() only supports lits of Test, str or int but got {type(e)}") return m def run_tests(models: list[str], seeds: list[int], tests: list[Test], base_url: str): try: print("Trying to load saved_results.json") with open("./saved_results.json", "r") as f: saved_results = json.load(fp=f) print("Loaded.") except: print("saved_results.json not found. Initializing empty.") saved_results = {} # Get Results run_results = {} print("Starting to run Tests ... ") for model in models: for seed in seeds: for test in tests: # Init dict combination = { 'test_name': test.name, 'model': model, 'seed': seed, } hash_key = str(nxhash(json.dumps(combination, sort_keys=True))) if hash_key not in saved_results.keys(): try: combination['answer'] = test.runnable(model=model, seed=seed, test=test, base_url=base_url) combination['test'] = test run_results[hash_key] = combination print("\033[0;32mModel '\033[0m" + model + "\033[0;32m'" + (" " * (get_len(models) - len(model))) + " with seed \033[0m" + str(seed) + (" " * (get_len(seeds) - len(str(seed)))) + "\033[0;32m finished test '\033[0m" + test.name + "\033[0;32m'" + (" " * (get_len(tests) - len(test.name))) + " (\033[0m" + hash_key + "\033[0;32m)\033[0m" ) except Exception as e: print("\033[0;31mError: <\033[0m " + str(e) + "\033[0;31m>\033[0m trying to continue...") else: print("\033[0;34mModel '\033[0m" + model + "\033[0;34m'" + (" " * (get_len(models) - len(model))) + " with seed \033[0m" + str(seed) + (" " * (get_len(seeds) - len(str(seed)))) + "\033[0;34m skipped test '\033[0m" + test.name + "\033[0;34m'" + (" " * (get_len(tests) - len(test.name))) + " (\033[0m" + hash_key + "\033[0;34m) becasue its results exists in saved_results.json\033[0m" ) # Validate Results if run_results != {}: print("\nStarting validation of tests ...") for hash_key in run_results: result = run_results[hash_key] entry = { 'test_name': result['test_name'], 'model': result['model'], 'seed': result['seed'], 'answer': result['answer'], 'validation': result['test'].validator(test=result['test'], answer=result['answer'], base_url=base_url) } saved_results[hash_key] = entry # add result with validation to saved results print("\033[0;36mTest results of model '\033[0m" + model + "\033[0;36m'" + (" " * (get_len(models) - len(entry['model']))) + " with seed \033[0m" + str(seed) + (" " * (get_len(seeds) - len(str(entry['seed'])))) + "\033[0;36m on test '\033[0m" + test.name + "\033[0;36m'" + (" " * (get_len(tests) - len(entry['test_name']))) + " (\033[0m" + hash_key + "\033[0;36m) evaluated to \033[0m" + ('\033[0;32mcorrect\033[0m' if entry['validation'] == True else '\033[0;31mincorrect\033[0m') ) with open("./saved_results.json", "w") as f: json.dump(fp=f, obj=saved_results, indent=4, sort_keys=True, ensure_ascii=False) return saved_results