mega commit

This commit is contained in:
Lennart J. Kurzweg (Nx2)
2024-08-20 20:47:17 +02:00
parent 4860179a1c
commit a578dd26a0
13 changed files with 608 additions and 305 deletions

View File

@@ -1,30 +1,19 @@
from libs.test_class import Test
from libs.classes import Test, Model
from libs.functions import nxhash
from typing import Union
import json
def padd(list, element):
longest = 0
for s in list:
longest = max(longest, len(str(s)))
return str(element).ljust(longest)
def nxhash(text:str): # @BenVida StackOverflow
hash=0
for ch in text:
hash = ( hash*281 ^ ord(ch)*997) & 0xFFFFFFFF
return hex(hash)[2:].upper().zfill(8)
def get_len(collection: Union[list, dict]) -> int:
maximum_length = 0
if isinstance(collection, dict):
collection_type = "tests"
elif isinstance(collection, list):
if isinstance(collection[0], str):
collection_type = "models"
elif isinstance(collection[0], int):
collection_type = "seeds"
if isinstance(collection, list):
collection_type = "seeds"
elif isinstance(collection, dict):
if isinstance(collection[list(collection.keys())[0]], Model):
collection_type = "models"
elif isinstance(collection[list(collection.keys())[0]], Test):
collection_type = "tests"
else:
raise TypeError("get_len: unsupported collection_type")
else:
@@ -32,8 +21,8 @@ def get_len(collection: Union[list, dict]) -> int:
match collection_type:
case "models":
for model_name in collection:
maximum_length = max(maximum_length, len(model_name))
for model_id in collection:
maximum_length = max(maximum_length, len(collection[model_id].display_name))
case "seeds":
for seed in collection:
maximum_length = max(maximum_length, len(str(seed)))
@@ -48,40 +37,42 @@ def get_len(collection: Union[list, dict]) -> int:
def run_tests(models: list[str], seeds: list[int], tests: list[Test], base_url: str):
def run_tests(models: dict[int, Model], seeds: list[int], tests: dict[int, Test], base_url: str):
try:
print("Trying to load saved_results.json")
with open("./saved_results.json", "r") as f:
saved_results = json.load(fp=f)
print("Loaded.")
except:
except FileNotFoundError:
print("saved_results.json not found. Initializing empty.")
saved_results = {}
# Get Results
run_results = {}
print("Starting to run Tests ... ")
for model in models:
for model_id in models:
model = models[model_id]
for test_id in tests:
test = tests[test_id]
for seed in seeds:
# Init dict
combination = {
'test_id': test_id,
'model': model,
'model_id': model_id,
'seed': seed,
}
hash_key = str(nxhash(json.dumps(combination, sort_keys=True)))
combination['test_name'] = test.name
combination['model_name'] = model.display_name
# if hash_key == "DE3D137E":
# pass
if hash_key not in saved_results.keys():
try:
print("\033[0;35mModel '\033[0m" +
model +
model.display_name +
"\033[0;35m'" +
(" " * (get_len(models) - len(model))) +
(" " * (get_len(models) - len(model.display_name))) +
" with seed \033[0m\033[0;30m" +
("0" * (get_len(seeds) - len(str(seed)))) +
"\033[0m" +
@@ -96,7 +87,7 @@ def run_tests(models: list[str], seeds: list[int], tests: list[Test], base_url:
end=""
)
answer = test.runnable(model=model, seed=seed, test=test, base_url=base_url)
if isinstance(answer, str): # tool capabile return tools called as a list[dict]
if isinstance(answer, str):
combination['answer'] = answer
# combination['tool_calls'] = [] # no entry
del answer
@@ -105,15 +96,14 @@ def run_tests(models: list[str], seeds: list[int], tests: list[Test], base_url:
combination['tool_calls'] = answer['tool_calls']
del answer
else:
raise Exception(f"runnable returd unkown type {type(answer)}.")
raise Exception(f"runnable returned unkown type {type(answer)}.")
combination['test'] = test
run_results[hash_key] = combination
print("\r\033[0;32mModel '\033[0m" +
model +
model.display_name +
"\033[0;32m'" +
(" " * (get_len(models) - len(model))) +
(" " * (get_len(models) - len(model.display_name))) +
" with seed \033[0m\033[0;30m" +
("0" * (get_len(seeds) - len(str(seed)))) +
"\033[0m" +
@@ -127,12 +117,12 @@ def run_tests(models: list[str], seeds: list[int], tests: list[Test], base_url:
"\033[0;32m)\033[0m"
)
except Exception as e:
print("\r\033[0;31mError: <\033[0m" + str(e) + "\033[0;31m> at (\033[0m" + hash_key + "\033[0;31m). Continuing...")
print("\r\033[0;31mError: <\033[0m" + str(e) + "\033[0;31m> at (\033[0m" + hash_key + "\033[0;31m). Continuing...\033[0m ")
else:
print("\r\033[0;34mModel '\033[0m" +
model +
model.display_name +
"\033[0;34m'" +
(" " * (get_len(models) - len(model))) +
(" " * (get_len(models) - len(model.display_name))) +
" with seed \033[0m\033[0;30m" +
("0" * (get_len(seeds) - len(str(seed)))) +
"\033[0m" +
@@ -148,7 +138,8 @@ def run_tests(models: list[str], seeds: list[int], tests: list[Test], base_url:
# Validate Results
if run_results != {}: print("\nStarting validation of tests ...")
if run_results != {}:
print("\nStarting validation of tests ...")
for hash_key in run_results:
result = run_results[hash_key]
@@ -156,27 +147,28 @@ def run_tests(models: list[str], seeds: list[int], tests: list[Test], base_url:
entry = {
'test_name': result['test_name'],
'test_id': result['test_id'],
'model': result['model'],
'model_name': result['model_name'],
'model_id': result['model_id'],
'seed': result['seed'],
'answer': result['answer'],
'validation': result['test'].validator(test=result['test'], answer=result['answer'], base_url=base_url),
}
except Exception as e:
print("\033[0;31mError validating entry (\033[0m" + hash_key + "\033[0;31m). <\033[0m" + str(e) + "\033[0;31m> Continuing...\033[0m")
print("\033[0;31mError validating entry (\033[0m" + hash_key + "\033[0;31m). <\033[0m" + str(e) + "\033[0;31m> Continuing...\033[0m ")
continue
try:
entry['tool_calls'] = result['tool_calls']
except:
except KeyError:
pass
saved_results[hash_key] = entry # add result with validation to saved results
print("\033[0;36mTest results of model '\033[0m" +
entry['model'] +
entry['model_name'] +
"\033[0;36m'" +
(" " * (get_len(models) - len(entry['model']))) +
(" " * (get_len(models) - len(entry['model_name']))) +
" with seed \033[0m\033[0;30m" +
("0" * (get_len(seeds) - len(str(entry['seed'])))) +
"\033[0m" +
@@ -188,7 +180,7 @@ def run_tests(models: list[str], seeds: list[int], tests: list[Test], base_url:
" (\033[0m" +
hash_key +
"\033[0;36m) evaluated to \033[0m" +
('\033[0;32mcorrect\033[0m' if entry['validation'] == True else '\033[0;31mincorrect\033[0m')
('\033[0;32mcorrect\033[0m' if entry['validation'] else '\033[0;31mincorrect\033[0m')
)
with open("./saved_results.json", "w") as f: