mf1

2024-08-26 21:20:47 +02:00
parent 2723ced901
commit 5d7ce3cf71
12 changed files with 2055 additions and 2350 deletions
--- a/.direnv/bin/nix-direnv-reload
+++ b/.direnv/bin/nix-direnv-reload
@@ -1,19 +0,0 @@
-#!/usr/bin/env bash
-set -e
-if [[ ! -d "/home/nx2/test-small-llms" ]]; then
-  echo "Cannot find source directory; Did you move it?"
-  echo "(Looking for "/home/nx2/test-small-llms")"
-  echo 'Cannot force reload with this script - use "direnv reload" manually and then try again'
-  exit 1
-fi
-
-# rebuild the cache forcefully
-_nix_direnv_force_reload=1 direnv exec "/home/nx2/test-small-llms" true
-
-# Update the mtime for .envrc.
-# This will cause direnv to reload again - but without re-building.
-touch "/home/nx2/test-small-llms/.envrc"
-
-# Also update the timestamp of whatever profile_rc we have.
-# This makes sure that we know we are up to date.
-touch -r "/home/nx2/test-small-llms/.envrc" "/home/nx2/test-small-llms/.direnv"/*.rc
--- a/.direnv/nix-profile-24.05-19l2iplgd8kw8b1i
+++ b/.direnv/nix-profile-24.05-19l2iplgd8kw8b1i
@@ -1 +0,0 @@
-/nix/store/j2vf461mp9h2y9awkklbfawf3dz7cs1p-nix-shell-env
--- a/.direnv/nix-profile-24.05-19l2iplgd8kw8b1i.rc
+++ b/.direnv/nix-profile-24.05-19l2iplgd8kw8b1i.rc
--- a/libs/ollama_functions.py
+++ b/libs/ollama_functions.py
@@ -20,11 +20,10 @@ from langchain_core.language_models import LanguageModelInput
 from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage, BaseMessage, ToolCall
 from langchain_core.outputs import ChatGeneration, ChatResult 
 from langchain_core.prompts import SystemMessagePromptTemplate
-from langchain_core.pydantic_v1 import BaseModel
+from langchain_core.pydantic_v1 import BaseModel 
 from langchain_core.runnables import Runnable
 from langchain_core.tools import BaseTool, Tool
 from langchain_core.utils.pydantic import is_basemodel_instance, is_basemodel_subclass
-from textwrap import dedent

 from libs.functions import nxhash

@@ -98,14 +97,15 @@ def _is_pydantic_class(obj: Any) -> bool:
        is_basemodel_subclass(obj) or BaseModel in obj.__bases__
    )

-class OllamaFunctions(ChatOllama):
+class OllamaFunctionsBase(ChatOllama):
    """Function chat model that uses Ollama API."""

    tool_system_prompt_template: str = DEFAULT_SYTEM_PROMPT
    tool_system_prompt_template_with_history: str = DEFAULT_SYTEM_PROMPT_WITH_HISTORY
    max_tool_call_fails: int = 5

-    def __init__(self, max_tool_call_fails, **kwargs: Any) -> None:
+    def __init__(self,  **kwargs: Any) -> None:
+
        super().__init__(**kwargs)

    def bind_tools(
@@ -115,6 +115,8 @@ class OllamaFunctions(ChatOllama):
    ) -> Runnable[LanguageModelInput, BaseMessage]:
        return self.bind(functions=tools, **kwargs)

+    def _get_final_message(self, messages: list, functions_str: str) -> list:
+        raise NotImplementedError

    def _generate(self, messages: List[BaseMessage], stop: Optional[List[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Any) -> ChatResult:
        def _convert_to_ollama_tool(self, tool: Any) -> Dict:
@@ -177,11 +179,11 @@ class OllamaFunctions(ChatOllama):
            return called_tool

        def _extract_conversaional_response(self, d: dict) -> str:
-            if ("tool_input" in d and "response" in d["tool_input"]):
+            if ("tool_input" in d and d["tool_input"] and "response" in d["tool_input"]):
                response = d["tool_input"]["response"]
-            elif ("input" in d and "response" in d["input"]):
+            elif ("input" in d and d["input"] and "response" in d["input"]):
                response = d["input"]["response"]
-            elif ("args" in d and "response" in d["args"]):
+            elif ("args" in d and d["args"] and "response" in d["args"]):
                response = d["args"]["response"]
            elif "response" in d:
                response = d["response"]
@@ -220,66 +222,6 @@ class OllamaFunctions(ChatOllama):
                called_tool_args = {}
            return called_tool_args

-        def _get_final_message(self, messages: list, functions_str: str) -> list:
-            def _get_system_msg_and_formatted_history(self, messages: list) -> Tuple[str, str]:
-                def _format_tools_for_history(tool_calls: list[ToolCall]) -> str:
-                    call_list = []
-                    for c in tool_calls:
-                        call_list.append({
-                          "id": nxhash(c['id'])[-4:],
-                          "tool": c['name'],
-                          "args": c['args']
-                        })
-                    if len(call_list) == 1:
-                        return json.dumps(obj=call_list[0], ensure_ascii=False, indent=2)
-                    else:
-                        return json.dumps(obj=call_list, ensure_ascii=False, indent=2) 
-                formated_history = ""
-                system_msg = messages[0]
-                for m in messages[1:]:
-
-                    if formated_history != "":
-                        formated_history += "\n\n"
-
-                    if isinstance(m, SystemMessage):
-                        formated_history += "The system provided the info:\n" + str(m.content)
-                    elif isinstance(m, HumanMessage):
-                        formated_history += "The Human said:\n" + str(m.content)
-                    elif isinstance(m, AIMessage) and m.tool_calls: 
-                        formated_history += "So you called the tool" + (":\n" if len(m.tool_calls) == 1 else "s:\n") + _format_tools_for_history(m.tool_calls)
-                    elif isinstance(m, ToolMessage): 
-                        formated_history += "To which the tool (" + nxhash(m.tool_call_id)[-4:] + ") replied with:\n" + str(m.content)
-                    elif isinstance(m, AIMessage) and not m.tool_calls: 
-                        formated_history += "You said:\n" + str(m.content)
-                    else:
-                        raise TypeError("OllamaFunctions only supports SystemMessage HumanMessage ToolMessage AIMessage but got " + str(type(m)))
-
-                return system_msg, formated_history
-
-            # prepare generation with history
-            if True in [ isinstance(m, ToolMessage) for m in messages ]:
-                system_msg, formated_history = _get_system_msg_and_formatted_history(self, messages=messages)
-                
-                system_message_prompt_template = SystemMessagePromptTemplate.from_template(self.tool_system_prompt_template_with_history)
-                system_message = system_message_prompt_template.format(
-                    tools=functions_str, 
-                    history=formated_history, 
-                    system_msg=system_msg
-                )
-                final_messages = [ system_message ]
-
-            # prepare generation without history
-            else:
-                system_message_prompt_template = SystemMessagePromptTemplate.from_template(self.tool_system_prompt_template)
-                system_message = system_message_prompt_template.format(
-                    tools=functions_str
-                )
-                final_messages = [ system_message ] + messages
-
-            return final_messages
-
-
-            

        def gen(self, failed_tool_calls: int, messages: list) -> ChatResult:

@@ -289,7 +231,7 @@ class OllamaFunctions(ChatOllama):
            functions_str = json.dumps(functions_list, indent=2)

            # get messages to prompt with
-            final_messages = _get_final_message(self, messages=messages, functions_str=functions_str)
+            final_messages = self._get_final_message(messages=messages, functions_str=functions_str)

            # genrerate chat result
            response_message = super()._generate(final_messages, stop=stop, run_manager=run_manager, **kwargs)
@@ -329,6 +271,125 @@ class OllamaFunctions(ChatOllama):
        return gen(self, failed_tool_calls=0, messages=messages)
    
    
+
+
+
+
+
+class OllamaFunctionsLSM(OllamaFunctionsBase):
+    """Function chat model that uses Ollama API."""
+       
+    def _get_final_message(self, messages: list, functions_str: str) -> list:
+        def _get_system_msg_and_formatted_history(self, messages: list) -> Tuple[str, str]:
+            def _format_tools_for_history(tool_calls: list[ToolCall]) -> str:
+                call_list = []
+                for c in tool_calls:
+                    call_list.append({
+                      "id": nxhash(c['id'])[-4:],
+                      "tool": c['name'],
+                      "args": c['args']
+                    })
+                if len(call_list) == 1:
+                    return json.dumps(obj=call_list[0], ensure_ascii=False, indent=2)
+                else:
+                    return json.dumps(obj=call_list, ensure_ascii=False, indent=2) 
+            formated_history = ""
+            system_msg = messages[0]
+            for m in messages[1:]:
+
+                if formated_history != "":
+                    formated_history += "\n\n"
+
+                if isinstance(m, SystemMessage):
+                    formated_history += "The system provided the info:\n" + str(m.content)
+                elif isinstance(m, HumanMessage):
+                    formated_history += "The Human said:\n" + str(m.content)
+                elif isinstance(m, AIMessage) and m.tool_calls: 
+                    formated_history += "So you called the tool" + (":\n" if len(m.tool_calls) == 1 else "s:\n") + _format_tools_for_history(m.tool_calls)
+                elif isinstance(m, ToolMessage): 
+                    formated_history += "To which the tool (" + nxhash(m.tool_call_id)[-4:] + ") replied with:\n" + str(m.content)
+                elif isinstance(m, AIMessage) and not m.tool_calls: 
+                    formated_history += "You said:\n" + str(m.content)
+                else:
+                    try:
+                        raise TypeError("OllamaFunctions only supports SystemMessage HumanMessage ToolMessage AIMessage but got " + str(type(m)))
+                    except NameError:
+                        raise TypeError("OllamaFunctions only supports SystemMessage HumanMessage ToolMessage AIMessage.")
+
+
+            return system_msg, formated_history
+
+        # prepare generation with history
+        if True in [ isinstance(m, ToolMessage) for m in messages ]:
+            system_msg, formated_history = _get_system_msg_and_formatted_history(self, messages=messages)
+        
+            system_message_prompt_template = SystemMessagePromptTemplate.from_template(self.tool_system_prompt_template_with_history)
+            system_message = system_message_prompt_template.format(
+                tools=functions_str, 
+                history=formated_history, 
+                system_msg=system_msg
+            )
+            final_messages = [ system_message ]
+
+        # prepare generation without history
+        else:
+            system_message_prompt_template = SystemMessagePromptTemplate.from_template(self.tool_system_prompt_template)
+            system_message = system_message_prompt_template.format(
+                tools=functions_str
+            )
+            final_messages = [ system_message ] + messages
+
+        return final_messages
+
    @property
    def _llm_type(self) -> str:
-        return "ollama_functions"
+        return "ollama_functions_lsm"
+
+
+    
+class OllamaFunctionsT2S(OllamaFunctionsBase):
+    """Function chat model that uses Ollama API."""
+       
+    def _get_final_message(self, messages: list, functions_str: str) -> list:
+        # prepare generation with history
+        if True in [ isinstance(m, ToolMessage) for m in messages ]:
+
+            transformed_messages = []
+            for m in messages:
+                if isinstance(m, ToolMessage):
+                    transformed_messages.append(SystemMessage(content=(
+                         f"The Tool '{m.name}' replied with:" + "\n" + str(m.content)
+                    )))
+                elif isinstance(m, AIMessage):
+                    if m.tool_calls:
+                        l = []
+                        for call in m.tool_calls:
+                            l.append({
+                                "tool": call['name'],
+                                "tool_input": call['args']
+                            })
+                        if len(l) == 1:
+                            transformed_messages.append(AIMessage(content=json.dumps(l[0])))
+                        else: 
+                            transformed_messages.append(AIMessage(content=json.dumps(l)))
+                else: 
+                    transformed_messages.append(m)
+            
+            system_message_prompt_template = SystemMessagePromptTemplate.from_template(self.tool_system_prompt_template)
+            system_message = system_message_prompt_template.format(tools=functions_str)
+
+            final_messages = [ system_message ] + transformed_messages
+
+        # prepare generation without history
+        else:
+            system_message_prompt_template = SystemMessagePromptTemplate.from_template(self.tool_system_prompt_template)
+            system_message = system_message_prompt_template.format(
+                tools=functions_str
+            )
+            final_messages = [ system_message ] + messages
+
+        return final_messages
+
+    @property
+    def _llm_type(self) -> str:
+        return "ollama_functions_t2s"
--- a/libs/run_tests.py
+++ b/libs/run_tests.py
@@ -79,8 +79,8 @@ def run_tests(models: dict[int, Model], seeds: list[int], tests: dict[int, Test]
                        'technique_name': technique.name,
                    })
                    
-                    # if hash_key == "DE3D137E":
-                        # pass
+                    if hash_key == "0DEB2030":
+                        pass

                    if hash_key not in saved_results.keys():
                        try: 
@@ -105,7 +105,7 @@ def run_tests(models: dict[int, Model], seeds: list[int], tests: dict[int, Test]
                                  "\033[0;35m)\033[0m",
                                  end=""
                            )
-                            answer = test.runnable(model=model, seed=seed, test=test, base_url=base_url)
+                            answer = test.runnable(model=model, seed=seed, test=test, technique=technique, base_url=base_url)
                            if isinstance(answer, str): 
                                combination['answer'] = answer
                                # combination['tool_calls'] = [] # no entry
@@ -172,13 +172,15 @@ def run_tests(models: dict[int, Model], seeds: list[int], tests: dict[int, Test]
        
        try:
            entry = {
-                'test_name':  result['test_name'],
-                'test_id':    result['test_id'],
-                'model_name': result['model_name'],
-                'model_id':   result['model_id'],
-                'seed':       result['seed'],
-                'answer':     result['answer'],
-                'validation': result['test'].validator(test=result['test'], answer=result['answer'], base_url=base_url),
+                'test_name':      result['test_name'],
+                'test_id':        result['test_id'],
+                'model_name':     result['model_name'],
+                'model_id':       result['model_id'],
+                'technique_name': result['technique_name'],
+                'technique_id':   result['technique_id'],
+                'seed':           result['seed'],
+                'answer':         result['answer'],
+                'validation':     result['test'].validator(test=result['test'], answer=result['answer'], base_url=base_url),
            }
        except Exception as e:
            print("\033[0;31mError validating entry (\033[0m" + hash_key + "\033[0;31m). <\033[0m" + str(e) + "\033[0;31m> Continuing...\033[0m                      ")
--- a/libs/runnables.py
+++ b/libs/runnables.py
@@ -1,8 +1,8 @@
 from types import NoneType
 from langchain_ollama.chat_models import ChatOllama
-from libs.ollama_functions import OllamaFunctions
+from libs.ollama_functions import OllamaFunctionsLSM, OllamaFunctionsT2S
 from langchain_core.messages import AIMessage, SystemMessage, HumanMessage, ToolMessage
-from libs.classes import Test, Model
+from libs.classes import Technique, Test, Model
 from langchain.tools import Tool
 from typing import Literal

@@ -10,22 +10,31 @@ from langgraph.graph import StateGraph, MessagesState
 import json
 from pydantic import ValidationError

-def _get_llm(model: Model, base_url: str, seed: int, tools: list[Tool]|NoneType = None):
-    if model.supports_tools:
+from suite_settings.techniques import techniques
+
+def _get_llm(model: Model, base_url: str, seed: int, technique: Technique, tools: list[Tool]|NoneType = None):
+    if technique == techniques[1]: # Native
        llm = ChatOllama(
            model=model.identifier,
            seed=seed,
            base_url=base_url
        )
-    else:
-        llm = OllamaFunctions(
+    elif technique == techniques[903]: # Long System Message
+        llm = OllamaFunctionsLSM(
            model=model.identifier,
            seed=seed,
            base_url=base_url,
            format="json",
-            max_tool_call_fails=3,
-            temperature=0.0
        )
+    elif technique == techniques[572]: # ToolMessages to SystemMessages
+        llm = OllamaFunctionsT2S(
+            model=model.identifier,
+            seed=seed,
+            base_url=base_url,
+            format="json",
+        )
+    else:
+        raise ValueError("Unkown Technique in _get_llm()")

    if tools:
        llm = llm.bind_tools(tools=tools)
@@ -33,7 +42,7 @@ def _get_llm(model: Model, base_url: str, seed: int, tools: list[Tool]|NoneType
    return llm


-def basic_prompt(model: Model, seed: int, test: Test, base_url: str) -> str:
+def basic_prompt(model: Model, seed: int, test: Test, technique: Technique, base_url: str) -> str:

    messages = [SystemMessage(test.runnable_input['system_msg'])]
    try: 
@@ -42,20 +51,20 @@ def basic_prompt(model: Model, seed: int, test: Test, base_url: str) -> str:
        pass 
    messages += [ HumanMessage(test.runnable_input['human_msg']) ]

-    llm = _get_llm(model=model, base_url=base_url, seed=seed)
+    llm = _get_llm(model=model, base_url=base_url, technique=technique, seed=seed)
    ai_msg = llm.invoke(messages)
    assert isinstance(ai_msg.content, str)
    return ai_msg.content



-def one_tool_call_answer(model: Model, seed: int, test: Test, base_url: str) -> dict:
+def one_tool_call_answer(model: Model, seed: int, test: Test, technique: Technique, base_url: str) -> dict:

    tools_dict = test.runnable_input['tools']
    tools = []
    for key in tools_dict:
        tools.append(tools_dict[key])
-    llm = _get_llm(model=model, base_url=base_url, seed=seed, tools=tools)
+    llm = _get_llm(model=model, base_url=base_url, seed=seed, technique=technique, tools=tools)

    messages = [SystemMessage(test.runnable_input['system_msg'])]
    try: 
@@ -108,7 +117,7 @@ def one_tool_call_answer(model: Model, seed: int, test: Test, base_url: str) ->
        "tool_calls": tool_calls,
    }

-def agent_with_tools(model: Model, seed: int, test: Test, base_url: str) -> dict[str, str|list]:
+def agent_with_tools(model: Model, seed: int, test: Test, technique: Technique, base_url: str) -> dict[str, str|list]:
    
    tool_calls = []
    index = -1
@@ -173,7 +182,7 @@ def agent_with_tools(model: Model, seed: int, test: Test, base_url: str) -> dict
    for key in tools_dict:
        tools.append(tools_dict[key])
    tool_node = NxToolNode(tools)
-    llm = _get_llm(model=model, base_url=base_url, seed=seed, tools=tools)
+    llm = _get_llm(model=model, base_url=base_url, seed=seed, technique=technique, tools=tools)

    workflow = StateGraph(MessagesState)

--- a/libs/tools.py
+++ b/libs/tools.py
@@ -141,6 +141,9 @@ def get_notes_containing(patterns: Union[list[str], str]) -> str:
        ret += f"{datetime.strftime(entry.time, '%Y/%m/%d %H:%M')} {entry.content}"
        is_first = False

+    if ret == "":
+        ret = "No matching notes were found. Try diffrent patterns."
+
    return ret
                               
@tool
--- a/print_saved_results.py
+++ b/print_saved_results.py
@@ -1,96 +0,0 @@
-import json
-import os
-import sys
-
-def print_help():
-    print("""Example usages: 
-
-python print_saved_results.py
-python print_saved_results.py -m llama3.1
-python print_saved_results.py -m llama3.1,mixtral-nemo:12b
-python print_saved_results.py -m llama3.1 -s 2222,2 -t "Healthy Vegetables in Chinese"
-
-Note: If one of the "fileters" does not exist, no error is thrown.""")
-
-def main(argv: list[str]) -> None:
-    try:
-        with open("./saved_results.json", "r") as f:
-            saved_results = json.load(fp=f)
-    except: 
-        print("saved_results.json not found. Try running test_suite.py first.")
-        exit(1)
-
-    if "-h" in argv:
-        print_help()
-        exit(0)
-
-    try:
-        if "-m" in argv:
-            test_str = argv[argv.index("-m")+1]
-            assert test_str[0] != "-"
-            models = test_str.split(",")
-            argv.pop(argv.index("-m")+1)
-            argv.pop(argv.index("-m"))
-        else: 
-            models = None
-
-        if "-s" in argv:
-            test_str = argv[argv.index("-s")+1]
-            assert test_str[0] != "-"
-            seeds = test_str.split(",")
-            argv.pop(argv.index("-s")+1)
-            argv.pop(argv.index("-s"))
-        else: 
-            seeds = None
-
-        if "-t" in argv:
-            test_str = argv[argv.index("-t")+1]
-            assert test_str[0] != "-"
-            tests = test_str.split(",")
-            argv.pop(argv.index("-t")+1)
-            argv.pop(argv.index("-t"))
-        else: 
-            tests = None
-    except:
-        print("Syntax error. Run `python print_saved_results.py -h` for help.")
-        print_help()
-        exit(1)
-        
-    argv.pop(0) # remove filename entry
-    if argv != []:
-        print("Syntax error. Run `python print_saved_results.py -h` for help.")
-        print(f"Got unkown argument{'s' if len(argv) != 1 else ''}: {argv}")
-        print_help()
-        exit(1)
-    
-        
-    first_print = True
-    term_size = os.get_terminal_size()
-
-    for hash_key in saved_results:
-        result = saved_results[hash_key]
-        if models ==  None or result['model'] in models:
-            if seeds ==  None or str(result['seed']) in seeds:
-                if tests ==  None or result['test_name'] in tests:
-                    if not first_print: print('-' * term_size.columns)
-
-                    print(
-                        "\n" +
-                        "\033[0;36mTest name:\033[0m " + 
-                        result['test_name'] + 
-                        "\n\033[0;36mModel:\033[0m " +
-                        result['model'] +
-                        "\n\033[0;36mSeed:\033[0m " +
-                        str(result['seed']) + 
-                        "\n\033[0;36mValidation result:\033[0m " +
-                        str(result['validation']) +
-                        "\n\033[0;36mAnswer: »\033[0m" + 
-                        result['answer'] + 
-                        "\033[0;36m«\033[0m" + 
-                        "\n" 
-                    )
-
-                    first_print = False
-
-if __name__ == "__main__":
-    main(argv=sys.argv)
--- a/saved_results-bak.json
+++ b/saved_results-bak.json
--- a/suite_settings/models.py
+++ b/suite_settings/models.py
@@ -1,90 +1,90 @@
 from libs.classes import Model

 models = {
-    245: Model(
-        display_name="llama3.1 8b", 
-        identifier="llama3.1",
-        supports_tools=True,
-        parameter_count_in_b=8
-    ),
-    238: Model(
-        display_name="llama3.1 70b",
-        identifier="llama3.1:70b",
-        supports_tools=True,
-        parameter_count_in_b=70
-    ),
-    120: Model(
-        display_name="llama3 groq TU 8b", 
-        identifier="llama3-groq-tool-use",
-        supports_tools=True,
-        parameter_count_in_b=8
-    ),
-    890: Model(
-        display_name="llama3 groq TU 70b",
-        identifier="llama3-groq-tool-use:70b",
-        supports_tools=True,
-        parameter_count_in_b=70
-    ),
-    348: Model(
-        display_name="Mixtral MoE 8x7b",
-        identifier="mixtral:8x7b",
-        supports_tools=False,
-        parameter_count_in_b=13,
-    ),
-    789: Model(
-        display_name="Mixtral MoE 8x22b",
-        identifier="mixtral:8x22b",
-        supports_tools=True,
-        parameter_count_in_b=39
-    ),
-    445: Model(
-        display_name="Gemma2 2b",
-        identifier="gemma2:2b",
-        supports_tools=False,
-        parameter_count_in_b=2
-    ),
-    475: Model(
-        display_name="Gemma2 9b",
-        identifier="gemma2:2b",
-        supports_tools=False,
-        parameter_count_in_b=9
-    ),
-    626: Model(
-        display_name="Gemma2 27b",
-        identifier="gemma2:2b",
-        supports_tools=False,
-        parameter_count_in_b=27
-    ),
-    229: Model(
-        display_name="Phi3 3.8b", 
-        identifier="phi3",
-        supports_tools=False,
-        parameter_count_in_b=3.8
-    ),
-    903: Model(
-        display_name="Tinyllama 1.1b",
-        identifier="tinyllama:1.1b",
-        supports_tools=False,
-        parameter_count_in_b=1.1
-    ),
-    670: Model(
-        display_name="Mistral Nemo 12b",
-        identifier="mistral-nemo:12b",
-        supports_tools=True,
-        parameter_count_in_b=12
-    ),
-    404: Model(
-        display_name="Command R+ 104b",
-        identifier="command-r-plus:104b",
-        supports_tools=True,
-        parameter_count_in_b=104
-    ),
-    701: Model(
-        display_name="Yi 6b",
-        identifier="yi:6b",
-        supports_tools=False,
-        parameter_count_in_b=6
-    ),
+#     245: Model(
+#         display_name="llama3.1 8b", 
+#         identifier="llama3.1",
+#         supports_tools=True,
+#         parameter_count_in_b=8
+#     ),
+#     238: Model(
+#         display_name="llama3.1 70b",
+#         identifier="llama3.1:70b",
+#         supports_tools=True,
+#         parameter_count_in_b=70
+#     ),
+#     120: Model(
+#         display_name="llama3 groq TU 8b", 
+#         identifier="llama3-groq-tool-use",
+#         supports_tools=True,
+#         parameter_count_in_b=8
+#     ),
+#     890: Model(
+#         display_name="llama3 groq TU 70b",
+#         identifier="llama3-groq-tool-use:70b",
+#         supports_tools=True,
+#         parameter_count_in_b=70
+#     ),
+#     348: Model(
+#         display_name="Mixtral MoE 8x7b",
+#         identifier="mixtral:8x7b",
+#         supports_tools=False,
+#         parameter_count_in_b=13,
+#     ),
+#     789: Model(
+#         display_name="Mixtral MoE 8x22b",
+#         identifier="mixtral:8x22b",
+#         supports_tools=True,
+#         parameter_count_in_b=39
+#     ),
+#     445: Model(
+#         display_name="Gemma2 2b",
+#         identifier="gemma2:2b",
+#         supports_tools=False,
+#         parameter_count_in_b=2
+#     ),
+#     475: Model(
+#         display_name="Gemma2 9b",
+#         identifier="gemma2:2b",
+#         supports_tools=False,
+#         parameter_count_in_b=9
+#     ),
+#     626: Model(
+#         display_name="Gemma2 27b",
+#         identifier="gemma2:2b",
+#         supports_tools=False,
+#         parameter_count_in_b=27
+#     ),
+#     229: Model(
+#         display_name="Phi3 3.8b", 
+#         identifier="phi3",
+#         supports_tools=False,
+#         parameter_count_in_b=3.8
+#     ),
+#     903: Model(
+#         display_name="Tinyllama 1.1b",
+#         identifier="tinyllama:1.1b",
+#         supports_tools=False,
+#         parameter_count_in_b=1.1
+#     ),
+#     670: Model(
+#         display_name="Mistral Nemo 12b",
+#         identifier="mistral-nemo:12b",
+#         supports_tools=True,
+#         parameter_count_in_b=12
+#     ),
+#     404: Model(
+#         display_name="Command R+ 104b",
+#         identifier="command-r-plus:104b",
+#         supports_tools=True,
+#         parameter_count_in_b=104
+#     ),
+#     701: Model(
+#         display_name="Yi 6b",
+#         identifier="yi:6b",
+#         supports_tools=False,
+#         parameter_count_in_b=6
+#     ),
    704: Model(
        display_name="Yi 9b",
        identifier="yi:9b",
--- a/suite_settings/techniques.py
+++ b/suite_settings/techniques.py
@@ -1,19 +1,19 @@
 from libs.classes import Technique

 techniques = {
-    190: Technique(
+    1: Technique(
        name="Native",
        for_supports_tools=True,
        for_not_supports_tools=False,
    ),
    903: Technique(
-        name="Long System Message",
+        name="LSM", # Long System Message
+        for_supports_tools=False,
+        for_not_supports_tools=True,
+    ),
+    572: Technique(
+        name="T2S", # Tool to System Messsages
        for_supports_tools=False,
        for_not_supports_tools=True,
    ),
-    # 572: Technique(
-    #     name="Tool to System Messsages",
-    #     for_supports_tools=False,
-    #     for_not_supports_tools=True,
-    # ),
 }
--- a/suite_settings/tests.py
+++ b/suite_settings/tests.py
@@ -1,160 +1,150 @@
-from libs.classes    import Test
-from libs.runnables  import basic_prompt, one_tool_call_answer, agent_with_tools
-from libs.validators import regex_match_any, system_human_answer_match 
-from libs.tools      import add, multiply, get_current_date_and_time, get_notes_in_timespan, get_notes_containing, write_note
-from textwrap        import dedent
+from libs.classes import Test
+from libs.runnables import basic_prompt, one_tool_call_answer, agent_with_tools
+from libs.validators import regex_match_any, system_human_answer_match
+from libs.tools import add, multiply, get_current_date_and_time, get_notes_in_timespan, get_notes_containing, write_note
+from textwrap import dedent
 from langchain_core.messages import HumanMessage, SystemMessage, ToolMessage, AIMessage

 tests = {
-	607: Test(
-	    name="Healthy Vegetables in Chinese",
-	    runnable=basic_prompt,
-	    runnable_input={
-	        "system_msg": "You are a helpful assistant. You serve people across the globe.",
-	        "human_msg": "什么蔬菜最健康？",
-	    },
-  	    validator=system_human_answer_match,
-	    validation_input={
-  	    "criteria": dedent("""- in Mandarin Chinese from front to finnish
-			- factually correct
-			- about healthy vegetables
-			- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes)
-
-			Again, the message has to be entirely in Manadarin Chineese.
-			That means If the answer is not in Chinese the answer is NOT correct! Only if the message in in Chinese rate as correct"""),
-	    }
-  	),
-    693: Test(
-	    name="Simple Multiplication",
-	    runnable=one_tool_call_answer,
-	    runnable_input={
-	        "system_msg": "You are a helpful assistant.",
-	        "human_msg": "What is 234215 times 143243?",
-	        "tools": {
-	            "add": add,
-	            "multiply": multiply
-	        }
-	    },
-        validator=regex_match_any,
-	    validation_input={
-	        "patterns": ["33549659245", "33,549,659,245", "33.549.659.245"]
-	        # "patterns": ["3[,\. ]?3[,\. ]?5[,\. ]?4[,\. ]?9[,\. ]?6[,\. ]?5[,\. ]?9[,\. ]?2[,\. ]?4[,\. ]?5"] # Would accept 3.354.965.9245
-	    }
-    ),
-	120: Test(
-	    name="Complex Multiplication",
-	    runnable=agent_with_tools,
-	    runnable_input={
-	        "system_msg": 'You are a helpful assistant. You can use tools to accomplish the task. Once you\'ve called a tool. the resulting tool_message content can be taken into consideration again. With that you can do "multiple rounds" of tool calling.',
-	        "human_msg": "What is 235 times 1243 times 21?",
-	        "tools": {
-	            "add": add,
-	            "multiply": multiply
-	        }
-	    },
-    	validator=regex_match_any,
-	    validation_input={
-	        "patterns": [ "6134205", "6.134.205", "6,134,205" ]
-	    }
-  ),
-  283: Test(
-	    name="Notes from last Saturday",
-	    runnable=agent_with_tools,
-	    runnable_input={
-	        "system_msg": "You are a helpful assistant. You can use tools to accomplish tasks. Once you've called a tool, the resulting tool_message content can be taken into consideration again. With that you can do \"multiple rounds\" of tool calling. To know the date, use the tool get_current_date_and_time.",
-	        "human_msg": "Last Saturday, who did grandma want me to call?",
-	        "tools": {
-	            "get_current_date_and_time": get_current_date_and_time,
-	            "get_notes_in_timespan": get_notes_in_timespan,
-	            "get_notes_containing": get_notes_containing,
-	            "Write note": write_note,
-	        }
-	    },
-	    validator=system_human_answer_match,
-	    validation_input={
-  		    "criteria": dedent("""- containing the information that the Human should call Wolfgang 
-			- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
-	    }
-    ),
-  260: Test(
-	    name="Notes from last Saturday TSO", # time span only
-	    runnable=agent_with_tools,
-	    runnable_input={
-	        "system_msg": "You are a helpful assistant. You can use tools to accomplish tasks. Once you've called a tool, the resulting tool_message content can be taken into consideration again. With that you can do \"multiple rounds\" of tool calling. To know the date, use the tool get_current_date_and_time.",
-	        "human_msg": "Last Saturday, who did grandma want me to call?",
-	        "tools": {
-	            "get_current_date_and_time": get_current_date_and_time,
-	            "get_notes_in_timespan": get_notes_in_timespan,
-	            "Write note": write_note
-	        }
-	    },
-      	validator=system_human_answer_match,
-	    validation_input={
-  		    "criteria": dedent("""- containing the information that the Human should call Wolfgang 
-			- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
-	    }
-	),
-	856: Test(
-	    name="Notes from last Saturday TSO FSP", 
-	    runnable=agent_with_tools,
-	    runnable_input={
-		    "system_msg": "You are a helpful assistant. You can use tools to accomplish tasks. Once you've called a tool, the resulting tool_message content can be taken into consideration again. With that you can do \"multiple rounds\" of tool calling. To know the date, use the tool get_current_date_and_time.",
-	        "fsp_messages": [
-				HumanMessage("Tomorrow is the anniversary! Any tips what I should by her?"), # One year ago
-			    AIMessage("", tool_calls=[{"name": "get_current_date_and_time", "args": {}, "id": "11" }]),
-			    ToolMessage("Wednesday the 31st of Juli 2024 09:31", tool_call_id="11" ),
-			    AIMessage("", tool_calls=[{"name": "get_notes_in_timespan", "args": {"begin": "2023/08/01", "to": "2023/08/01"}, "id": "12"}], ),
-			    ToolMessage("2023/08/01 23:10 Went out with Charlotte for our anniversary. Pizza at Cavalinos. She loved the rose necklace!", tool_call_id="12"),
-			    AIMessage("I'm afraid I cannot be of great help, since I obviously know charlotte way less than you, but last year you two went out to Cavalinons and you got her a rose necklace as a present. And she liked it. So maybe a pair of earrings would be something she'd like?", name="example_assistant", ),
-
-				HumanMessage("Did I write down anything yesterday or the day before that?"),
-			    AIMessage("", tool_calls=[{"name": "get_current_date_and_time", "args": {}, "id": "21" }], ),
-			    ToolMessage("Wednesday the 7th of August 2024 16:23", tool_call_id="21" ),
-			    AIMessage("", tool_calls=[{"name": "get_notes_in_timespan", "args": {"begin": "2024/08/05", "to": "2024/08/06"}, "id": "22"}], ),
-			    ToolMessage("2024/08/05 11:45 Ask Dr. Mills about the side effects of the new medication he got me.\n\n2024/08/06 18:30 Pick up the dry cleaning on Thursday; they close early on Fridays.", tool_call_id="22"),
-			    AIMessage("Yes. I found two entries.\n- From yesterday stating that you wanted to pickup the dry cleaning on Thursday, because they close early on Fridays\n- From Monday a note saying that you want to ask Dr. Mills about the side effects of the new medication he got you.", name="example_assistant", ),
-	        ],
-			"human_msg": "Last Saturday, who did grandma want me to call?",
-	        "tools": {
-	            "get_current_date_and_time": get_current_date_and_time,
-	            "get_notes_in_timespan": get_notes_in_timespan,
-	            "Write note": write_note
-	        }
-	    },
+    607: Test(
+        name="Healthy Vegetables in Chinese",
+        runnable=basic_prompt,
+        runnable_input={
+            "system_msg": "You are a helpful assistant. You serve people across the globe.",
+            "human_msg": "什么蔬菜最健康？",
+        },
        validator=system_human_answer_match,
-	    validation_input={
-	  	    "criteria": dedent("""- containing the information that the Human should call Wolfgang 
-			- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
-	    }
- ),
-	
-	# 363: Test(),
-	# 600: Test(),
-	# 221: Test(),
-	# 985: Test(),
-	# 634: Test(),
-	# 927: Test(),
-	# 346: Test(),
-	# 995: Test(),
-	# 404: Test(),
-	# 299: Test(),
-	# 275: Test(),
-	# 852: Test(),
-	# 376: Test(),
-	# 263: Test(),
-	# 432: Test(),
-	# 270: Test(),
-	# 797: Test(),
-	# 340: Test(),
-	# 489: Test(),
-	# 786: Test(),
-	# 121: Test(),
-	# 971: Test(),
-	# 436: Test(),
-	# 147: Test(),
-	# 534: Test(),
-	# 190: Test(),
-	# 158: Test(),
-	# 191: Test(),
-}
+        validation_input={
+            "criteria": dedent("""- in Mandarin Chinese from front to finnish
+            - factually correct
+            - about healthy vegetables
+            - just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes)

+            Again, the message has to be entirely in Manadarin Chineese.
+            That means If the answer is not in Chinese the answer is NOT correct! Only if the message in in Chinese rate as correct"""),
+        },
+    ),
+    693: Test(
+        name="Simple Multiplication",
+        runnable=one_tool_call_answer,
+        runnable_input={
+            "system_msg": "You are a helpful assistant.",
+            "human_msg": "What is 234215 times 143243?",
+            "tools": {"add": add, "multiply": multiply},
+        },
+        validator=regex_match_any,
+        validation_input={
+            "patterns": ["33549659245", "33,549,659,245", "33.549.659.245"]
+            # "patterns": ["3[,\. ]?3[,\. ]?5[,\. ]?4[,\. ]?9[,\. ]?6[,\. ]?5[,\. ]?9[,\. ]?2[,\. ]?4[,\. ]?5"] # Would accept 3.354.965.9245
+        },
+    ),
+    120: Test(
+        name="Complex Multiplication",
+        runnable=agent_with_tools,
+        runnable_input={
+            "system_msg": 'You are a helpful assistant. You can use tools to accomplish the task. Once you\'ve called a tool. the resulting tool_message content can be taken into consideration again. With that you can do "multiple rounds" of tool calling.',
+            "human_msg": "What is 235 times 1243 times 21?",
+            "tools": {"add": add, "multiply": multiply},
+        },
+        validator=regex_match_any,
+        validation_input={"patterns": ["6134205", "6.134.205", "6,134,205"]},
+    ),
+    283: Test(
+        name="Notes from last Saturday",
+        runnable=agent_with_tools,
+        runnable_input={
+            "system_msg": 'You are a helpful assistant. You can use tools to accomplish tasks. Once you\'ve called a tool, the resulting tool_message content can be taken into consideration again. With that you can do "multiple rounds" of tool calling. To know the date, use the tool get_current_date_and_time.',
+            "human_msg": "Last Saturday, who did grandma want me to call?",
+            "tools": {
+                "get_current_date_and_time": get_current_date_and_time,
+                "get_notes_in_timespan": get_notes_in_timespan,
+                "get_notes_containing": get_notes_containing,
+                "Write note": write_note,
+            },
+        },
+        validator=system_human_answer_match,
+        validation_input={
+            "criteria": dedent("""- containing the information that the Human should call Wolfgang 
+    - just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
+        },
+    ),
+    260: Test(
+        name="Notes from last Saturday TSO",  # time span only
+        runnable=agent_with_tools,
+        runnable_input={
+            "system_msg": 'You are a helpful assistant. You can use tools to accomplish tasks. Once you\'ve called a tool, the resulting tool_message content can be taken into consideration again. With that you can do "multiple rounds" of tool calling. To know the date, use the tool get_current_date_and_time.',
+            "human_msg": "Last Saturday, who did grandma want me to call?",
+            "tools": {
+                "get_current_date_and_time": get_current_date_and_time,
+                "get_notes_in_timespan": get_notes_in_timespan,
+                "Write note": write_note,
+            },
+        },
+        validator=system_human_answer_match,
+        validation_input={
+            "criteria": dedent("""- containing the information that the Human should call Wolfgang 
+    - just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
+        },
+    ),
+    856: Test(
+        name="Notes from last Saturday TSO FSP",
+        runnable=agent_with_tools,
+        runnable_input={
+            "system_msg": 'You are a helpful assistant. You can use tools to accomplish tasks. Once you\'ve called a tool, the resulting tool_message content can be taken into consideration again. With that you can do "multiple rounds" of tool calling. To know the date, use the tool get_current_date_and_time.',
+            "fsp_messages": [
+                HumanMessage("Tomorrow is the anniversary! Any tips what I should by her?"),  # One year ago
+                AIMessage("", tool_calls=[{"name": "get_current_date_and_time", "args": {}, "id": "11"}]),
+                ToolMessage("Wednesday the 31st of Juli 2024 09:31", tool_call_id="11"),
+                AIMessage("", tool_calls=[ { "name": "get_notes_in_timespan", "args": {"begin": "2023/08/01", "to": "2023/08/01"}, "id": "12", } ]),
+                ToolMessage("2023/08/01 23:10 Went out with Charlotte for our anniversary. Pizza at Cavalinos. She loved the rose necklace!", tool_call_id="12"),
+                AIMessage("I'm afraid I cannot be of great help, since I obviously know charlotte way less than you, but last year you two went out to Cavalinons and you got her a rose necklace as a present. And she liked it. So maybe a pair of earrings would be something she'd like?", name="example_assistant"),
+
+                HumanMessage("Did I write down anything yesterday or the day before that?"),
+                AIMessage( "", tool_calls=[{"name": "get_current_date_and_time", "args": {}, "id": "21"}]),
+                ToolMessage("Wednesday the 7th of August 2024 16:23", tool_call_id="21"),
+                AIMessage( "", tool_calls=[ { "name": "get_notes_in_timespan", "args": {"begin": "2024/08/05", "to": "2024/08/06"}, "id": "22"}]),
+                ToolMessage( "2024/08/05 11:45 Ask Dr. Mills about the side effects of the new medication he got me.\n\n2024/08/06 18:30 Pick up the dry cleaning on Thursday; they close early on Fridays.", tool_call_id="22"),
+                AIMessage( "Yes. I found two entries.\n- From yesterday stating that you wanted to pickup the dry cleaning on Thursday, because they close early on Fridays\n- From Monday a note saying that you want to ask Dr. Mills about the side effects of the new medication he got you.", name="example_assistant"),
+            ],
+            "human_msg": "Last Saturday, who did grandma want me to call?",
+            "tools": {
+                "get_current_date_and_time": get_current_date_and_time,
+                "get_notes_in_timespan": get_notes_in_timespan,
+                "Write note": write_note,
+            },
+        },
+        validator=system_human_answer_match,
+        validation_input={
+            "criteria": dedent("""- containing the information that the Human should call Wolfgang 
+            - just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
+        },
+    ),
+    # 363: Test(),
+    # 600: Test(),
+    # 221: Test(),
+    # 985: Test(),
+    # 634: Test(),
+    # 927: Test(),
+    # 346: Test(),
+    # 995: Test(),
+    # 404: Test(),
+    # 299: Test(),
+    # 275: Test(),
+    # 852: Test(),
+    # 376: Test(),
+    # 263: Test(),
+    # 432: Test(),
+    # 270: Test(),
+    # 797: Test(),
+    # 340: Test(),
+    # 489: Test(),
+    # 786: Test(),
+    # 121: Test(),
+    # 971: Test(),
+    # 436: Test(),
+    # 147: Test(),
+    # 534: Test(),
+    # 190: Test(),
+    # 158: Test(),
+    # 191: Test(),
+}
				`@@ -1 +0,0 @@`
				`/nix/store/j2vf461mp9h2y9awkklbfawf3dz7cs1p-nix-shell-env`