hash fixed + better run pirnts
This commit is contained in:
@@ -10,19 +10,39 @@ def padd(list, element):
|
|||||||
longest = max(longest, len(str(s)))
|
longest = max(longest, len(str(s)))
|
||||||
return str(element).ljust(longest)
|
return str(element).ljust(longest)
|
||||||
|
|
||||||
|
def nxhash(text:str): # @BenVida StackOverflow
|
||||||
|
hash=0
|
||||||
|
for ch in text:
|
||||||
|
hash = ( hash*281 ^ ord(ch)*997) & 0xFFFFFFFF
|
||||||
|
return hex(hash)[2:].upper().zfill(8)
|
||||||
|
|
||||||
def run_tests(models: list[str], seeds: list[int], tests: list[Test], base_url: str):
|
def get_len(l: list) -> int:
|
||||||
|
m = 0
|
||||||
# try:
|
for e in l:
|
||||||
with open("./saved_results.json", "r") as f:
|
if isinstance(e, Test):
|
||||||
saved_results = json.load(fp=f)
|
m = max(m, len(e.name))
|
||||||
# except:
|
elif isinstance(e, str):
|
||||||
# saved_results = {}
|
m = max(m, len(e))
|
||||||
|
elif isinstance(e, int):
|
||||||
|
m = max(m, len(str(e)))
|
||||||
|
else:
|
||||||
|
raise Exception(f"get_len() only supports lits of Test, str or int but got {type(e)}")
|
||||||
|
return m
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def run_tests(models: list[str], seeds: list[int], tests: list[Test], base_url: str):
|
||||||
|
try:
|
||||||
|
print("Trying to load saved_results.json")
|
||||||
|
with open("./saved_results.json", "r") as f:
|
||||||
|
saved_results = json.load(fp=f)
|
||||||
|
print("Loaded.")
|
||||||
|
except:
|
||||||
|
print("saved_results.json not found. Initializing empty.")
|
||||||
|
saved_results = {}
|
||||||
# Get Results
|
# Get Results
|
||||||
run_results = {}
|
run_results = {}
|
||||||
|
print("Starting to run Tests ... ")
|
||||||
for model in models:
|
for model in models:
|
||||||
for seed in seeds:
|
for seed in seeds:
|
||||||
for test in tests:
|
for test in tests:
|
||||||
@@ -33,21 +53,50 @@ def run_tests(models: list[str], seeds: list[int], tests: list[Test], base_url:
|
|||||||
'model': model,
|
'model': model,
|
||||||
'seed': seed,
|
'seed': seed,
|
||||||
}
|
}
|
||||||
hash_key = str(hash(json.dumps(combination, sort_keys=True)))
|
hash_key = str(nxhash(json.dumps(combination, sort_keys=True)))
|
||||||
|
|
||||||
if hash_key not in saved_results.keys():
|
if hash_key not in saved_results.keys():
|
||||||
# try:
|
try:
|
||||||
combination['answer'] = test.runnable(model=model, seed=seed, test=test, base_url=base_url)
|
combination['answer'] = test.runnable(model=model, seed=seed, test=test, base_url=base_url)
|
||||||
combination['test'] = test
|
combination['test'] = test
|
||||||
run_results[hash_key] = combination
|
run_results[hash_key] = combination
|
||||||
print(f"Model {padd(models, model)} starting with seed {padd(seeds, seed)} is done with test '{test.name}'.")
|
print("\033[0;32mModel '\033[0m" +
|
||||||
# except Exception as e:
|
model +
|
||||||
# print("\033[0;31mError:\033[0m " + str(e))
|
"\033[0;32m'" +
|
||||||
|
(" " * (get_len(models) - len(model))) +
|
||||||
|
" with seed \033[0m" +
|
||||||
|
str(seed) +
|
||||||
|
(" " * (get_len(seeds) - len(str(seed)))) +
|
||||||
|
"\033[0;32m finished test '\033[0m" +
|
||||||
|
test.name +
|
||||||
|
"\033[0;32m'" +
|
||||||
|
(" " * (get_len(tests) - len(test.name))) +
|
||||||
|
" (\033[0m" +
|
||||||
|
hash_key +
|
||||||
|
"\033[0;32m)\033[0m"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print("\033[0;31mError: <\033[0m " + str(e) + "\033[0;31m>\033[0m trying to continue...")
|
||||||
else:
|
else:
|
||||||
print(f"Skipped {combination}")
|
print("\033[0;34mModel '\033[0m" +
|
||||||
|
model +
|
||||||
|
"\033[0;34m'" +
|
||||||
|
(" " * (get_len(models) - len(model))) +
|
||||||
|
" with seed \033[0m" +
|
||||||
|
str(seed) +
|
||||||
|
(" " * (get_len(seeds) - len(str(seed)))) +
|
||||||
|
"\033[0;34m skipped test '\033[0m" +
|
||||||
|
test.name +
|
||||||
|
"\033[0;34m'" +
|
||||||
|
(" " * (get_len(tests) - len(test.name))) +
|
||||||
|
" (\033[0m" +
|
||||||
|
hash_key +
|
||||||
|
"\033[0;34m) becasue its results exists in saved_results.json\033[0m"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# Validate Results
|
# Validate Results
|
||||||
|
if run_results != {}: print("\nStarting validation of tests ...")
|
||||||
for hash_key in run_results:
|
for hash_key in run_results:
|
||||||
result = run_results[hash_key]
|
result = run_results[hash_key]
|
||||||
|
|
||||||
@@ -61,10 +110,24 @@ def run_tests(models: list[str], seeds: list[int], tests: list[Test], base_url:
|
|||||||
|
|
||||||
saved_results[hash_key] = entry # add result with validation to saved results
|
saved_results[hash_key] = entry # add result with validation to saved results
|
||||||
|
|
||||||
print(f"Validation of answer from test {entry['test_name']} by {entry['model']} with seed {entry['seed']} evaluated to " + ('\033[0;32mcorrect\033[0m' if entry['validation'] == True else '\033[0;31mincorrect\033[0m'))
|
print("\033[0;36mTest results of model '\033[0m" +
|
||||||
|
model +
|
||||||
|
"\033[0;36m'" +
|
||||||
|
(" " * (get_len(models) - len(entry['model']))) +
|
||||||
|
" with seed \033[0m" +
|
||||||
|
str(seed) +
|
||||||
|
(" " * (get_len(seeds) - len(str(entry['seed'])))) +
|
||||||
|
"\033[0;36m on test '\033[0m" +
|
||||||
|
test.name +
|
||||||
|
"\033[0;36m'" +
|
||||||
|
(" " * (get_len(tests) - len(entry['test_name']))) +
|
||||||
|
" (\033[0m" +
|
||||||
|
hash_key +
|
||||||
|
"\033[0;36m) evaluated to \033[0m" +
|
||||||
|
('\033[0;32mcorrect\033[0m' if entry['validation'] == True else '\033[0;31mincorrect\033[0m')
|
||||||
|
)
|
||||||
|
|
||||||
with open("./saved_results.json", "w") as f:
|
with open("./saved_results.json", "w") as f:
|
||||||
json.dump(fp=f, obj=saved_results, indent=4, ensure_ascii=False)
|
json.dump(fp=f, obj=saved_results, indent=4, sort_keys=True, ensure_ascii=False)
|
||||||
print("Dumped")
|
|
||||||
|
|
||||||
return saved_results
|
return saved_results
|
||||||
|
|||||||
Reference in New Issue
Block a user