This commit is contained in:
Lennart J. Kurzweg (Nx2)
2024-08-26 21:20:47 +02:00
parent 2723ced901
commit 5d7ce3cf71
12 changed files with 2055 additions and 2350 deletions

View File

@@ -79,8 +79,8 @@ def run_tests(models: dict[int, Model], seeds: list[int], tests: dict[int, Test]
'technique_name': technique.name,
})
# if hash_key == "DE3D137E":
# pass
if hash_key == "0DEB2030":
pass
if hash_key not in saved_results.keys():
try:
@@ -105,7 +105,7 @@ def run_tests(models: dict[int, Model], seeds: list[int], tests: dict[int, Test]
"\033[0;35m)\033[0m",
end=""
)
answer = test.runnable(model=model, seed=seed, test=test, base_url=base_url)
answer = test.runnable(model=model, seed=seed, test=test, technique=technique, base_url=base_url)
if isinstance(answer, str):
combination['answer'] = answer
# combination['tool_calls'] = [] # no entry
@@ -172,13 +172,15 @@ def run_tests(models: dict[int, Model], seeds: list[int], tests: dict[int, Test]
try:
entry = {
'test_name': result['test_name'],
'test_id': result['test_id'],
'model_name': result['model_name'],
'model_id': result['model_id'],
'seed': result['seed'],
'answer': result['answer'],
'validation': result['test'].validator(test=result['test'], answer=result['answer'], base_url=base_url),
'test_name': result['test_name'],
'test_id': result['test_id'],
'model_name': result['model_name'],
'model_id': result['model_id'],
'technique_name': result['technique_name'],
'technique_id': result['technique_id'],
'seed': result['seed'],
'answer': result['answer'],
'validation': result['test'].validator(test=result['test'], answer=result['answer'], base_url=base_url),
}
except Exception as e:
print("\033[0;31mError validating entry (\033[0m" + hash_key + "\033[0;31m). <\033[0m" + str(e) + "\033[0;31m> Continuing...\033[0m ")