from libs.test_class import Test from libs.validators import system_human_answer_match from libs.runnables import basic def padd(list, element): longest = 0 for s in list: longest = max(longest, len(str(s))) return str(element).ljust(longest) def run_tests(models: list[str], seeds: list[int], tests: list[Test], base_url: str): results = [] esc = "\033" for model in models: for seed in seeds: for test in tests: try: result = test.runnable(model=model, seed=seed, test=test, base_url=base_url) results.append({"test": test,"model": model, "seed": seed, "result": result}) print(f"Model {padd(models, model)} starting with seed {padd(seeds, seed)} is done with test '{test.name}'.") except Exception as e: print("\033[0;31mError:\033[0m" + e) for result in results: result['validation'] = test.validator(test=result['test'], answer=result['result'], base_url=base_url) print(f"Validation of answer from test {result['test'].name} by {result['model']} with seed {result['seed']} evaluated to " + ('\033[0;32mcorrect\033[0m' if result['validation'] == True else '\033[0;31mincorrect\033[0m')) return results