test-small-llms/libs/run_tests.py

from libs.test_class import Test
from libs.validators import system_human_answer_match
from libs.runnables import basic

import json

def padd(list, element):
    longest = 0
    for s in list:
        longest = max(longest, len(str(s)))
    return str(element).ljust(longest)

def nxhash(text:str): # @BenVida StackOverflow
    hash=0
    for ch in text:
        hash = ( hash*281  ^ ord(ch)*997) & 0xFFFFFFFF
    return hex(hash)[2:].upper().zfill(8)

def get_len(l: list) -> int:
    m = 0
    for e in l:
        if isinstance(e, Test):
            m = max(m, len(e.name))
        elif isinstance(e, str):
            m = max(m, len(e))
        elif isinstance(e, int):
            m = max(m, len(str(e)))
        else:
            raise Exception(f"get_len() only supports lits of Test, str or int but got {type(e)}")
    return m


def run_tests(models: list[str], seeds: list[int], tests: list[Test], base_url: str):
    try:
        print("Trying to load saved_results.json")
        with open("./saved_results.json", "r") as f:
            saved_results = json.load(fp=f)
        print("Loaded.")
    except:
        print("saved_results.json not found. Initializing empty.")
        saved_results = {}
    # Get Results
    run_results = {}
    print("Starting to run Tests ... ")
    for model in models:
        for seed in seeds:
            for test in tests:

                # Init dict
                combination = {
                    'test_name': test.name,
                    'model': model,
                    'seed': seed,
                }
                hash_key = str(nxhash(json.dumps(combination, sort_keys=True)))

                if hash_key not in saved_results.keys():
                    try:
                        combination['answer'] = test.runnable(model=model, seed=seed, test=test, base_url=base_url)
                        combination['test'] = test
                        run_results[hash_key] = combination
                        print("\033[0;32mModel '\033[0m" +
                              model +
                              "\033[0;32m'" +
                              (" " * (get_len(models) - len(model))) +
                              " with seed \033[0m" +
                              str(seed) +
                              (" " * (get_len(seeds) - len(str(seed)))) +
                              "\033[0;32m finished test '\033[0m" +
                              test.name +
                              "\033[0;32m'" +
                              (" " * (get_len(tests) - len(test.name))) +
                              " (\033[0m" +
                              hash_key +
                              "\033[0;32m)\033[0m"
                        )
                    except Exception as e:
                          print("\033[0;31mError: <\033[0m " + str(e) + "\033[0;31m>\033[0m trying to continue...")
                else:
                    print("\033[0;34mModel '\033[0m" +
                          model +
                          "\033[0;34m'" +
                          (" " * (get_len(models) - len(model))) +
                          " with seed \033[0m" +
                          str(seed) +
                          (" " * (get_len(seeds) - len(str(seed)))) +
                          "\033[0;34m skipped  test '\033[0m" +
                          test.name +
                          "\033[0;34m'" +
                          (" " * (get_len(tests) - len(test.name))) +
                          " (\033[0m" +
                          hash_key +
                          "\033[0;34m) becasue its results exists in saved_results.json\033[0m"
                    )


    # Validate Results
    if run_results != {}: print("\nStarting validation of tests ...")
    for hash_key in run_results:
        result = run_results[hash_key]

        entry = {
            'test_name':  result['test_name'],
            'model':      result['model'],
            'seed':       result['seed'],
            'answer':     result['answer'],
            'validation': result['test'].validator(test=result['test'], answer=result['answer'], base_url=base_url)
        }

        saved_results[hash_key] = entry # add result with validation to saved results

        print("\033[0;36mTest results of model '\033[0m" +
              model +
              "\033[0;36m'" +
              (" " * (get_len(models) - len(entry['model']))) +
              " with seed \033[0m" +
              str(seed) +
              (" " * (get_len(seeds) - len(str(entry['seed'])))) +
              "\033[0;36m on test '\033[0m" +
              test.name +
              "\033[0;36m'" +
              (" " * (get_len(tests) - len(entry['test_name']))) +
              " (\033[0m" +
              hash_key +
              "\033[0;36m) evaluated to \033[0m" +
              ('\033[0;32mcorrect\033[0m' if entry['validation'] == True else '\033[0;31mincorrect\033[0m')
        )

    with open("./saved_results.json", "w") as f:
        json.dump(fp=f, obj=saved_results, indent=4, sort_keys=True, ensure_ascii=False)

    return saved_results