mega commit

This commit is contained in:
Lennart J. Kurzweg (Nx2)
2024-08-20 20:47:17 +02:00
parent 4860179a1c
commit a578dd26a0
13 changed files with 608 additions and 305 deletions

View File

@@ -1,13 +1,112 @@
models = [
"llama3.1", # 8b
"llama3.1:70b",
"llama3-groq-tool-use", # latest
"llama3-groq-tool-use:70b",
# "mixtral:8x7b",
"mixtral:8x22b",
# "gemma2:2b",
# "phi3", # 3.8b
# "tinyllama:1.1b",
"mistral-nemo:12b",
"command-r-plus:104b",
]
from libs.classes import Model
models = {
245: Model(
display_name="llama3.1 8b",
identifier="llama3.1",
supports_tools=True,
parameter_count_in_b=8
),
238: Model(
display_name="llama3.1 70b",
identifier="llama3.1:70b",
supports_tools=True,
parameter_count_in_b=70
),
120: Model(
display_name="llama3 groq TU 8b",
identifier="llama3-groq-tool-use",
supports_tools=True,
parameter_count_in_b=8
),
890: Model(
display_name="llama3 groq TU 70b",
identifier="llama3-groq-tool-use:70b",
supports_tools=True,
parameter_count_in_b=70
),
348: Model(
display_name="Mixtral MoE 8x7b",
identifier="mixtral:8x7b",
supports_tools=False,
parameter_count_in_b=13,
),
789: Model(
display_name="Mixtral MoE 8x22b",
identifier="mixtral:8x22b",
supports_tools=True,
parameter_count_in_b=39
),
445: Model(
display_name="Gemma2 2b",
identifier="gemma2:2b",
supports_tools=False,
parameter_count_in_b=2
),
475: Model(
display_name="Gemma2 9b",
identifier="gemma2:2b",
supports_tools=False,
parameter_count_in_b=9
),
626: Model(
display_name="Gemma2 27b",
identifier="gemma2:2b",
supports_tools=False,
parameter_count_in_b=27
),
229: Model(
display_name="Phi3 3.8b",
identifier="phi3",
supports_tools=False,
parameter_count_in_b=3.8
),
903: Model(
display_name="Tinyllama 1.1b",
identifier="tinyllama:1.1b",
supports_tools=False,
parameter_count_in_b=1.1
),
670: Model(
display_name="Mistral Nemo 12b",
identifier="mistral-nemo:12b",
supports_tools=True,
parameter_count_in_b=12
),
404: Model(
display_name="Command R+ 104b",
identifier="command-r-plus:104b",
supports_tools=True,
parameter_count_in_b=104
),
701: Model(
display_name="Yi 6b",
identifier="yi:7b",
supports_tools=False,
parameter_count_in_b=6
),
704: Model(
display_name="Yi 6b",
identifier="yi:7b",
supports_tools=False,
parameter_count_in_b=6
),
724: Model(
display_name="Yi 34b",
identifier="yi:34b",
supports_tools=False,
parameter_count_in_b=34
),
129: Model(
display_name="Yi 34b",
identifier="yi:34b",
supports_tools=False,
parameter_count_in_b=34
),
853: Model(
display_name="Qwen2 0.5b",
identifier="qwen2:0.5b",
supports_tools=False,
parameter_count_in_b=0.5
),
}

View File

@@ -1,21 +1,21 @@
from libs.test_class import Test
from libs.runnables import *
from libs.validators import *
from libs.tools import *
from libs.classes import Test
from libs.runnables import basic_prompt, one_tool_call_answer, agent_with_tools
from libs.validators import regex_match_any, system_human_answer_match
from libs.tools import add, multiply, get_current_date_and_time, get_notes_in_timespan, get_notes_containing, write_note
from textwrap import dedent
from langchain_core.messages import HumanMessage, SystemMessage, ToolMessage, AIMessage
tests = {
607: Test(
name="Healthy Vegetables in Chinese",
runnable=basic,
runnable=basic_prompt,
runnable_input={
"system_msg": "You are a helpful assistant. You serve people across the globe.",
"human_msg": "什么蔬菜最健康?",
},
validator=system_human_answer_match,
validator=system_human_answer_match,
validation_input={
"criteria": dedent("""- in Mandarin Chinese from front to finnish
"criteria": dedent("""- in Mandarin Chinese from front to finnish
- factually correct
- about healthy vegetables
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes)
@@ -23,7 +23,7 @@ tests = {
Again, the message has to be entirely in Manadarin Chineese.
That means If the answer is not in Chinese the answer is NOT correct! Only if the message in in Chinese rate as correct"""),
}
),
),
693: Test(
name="Simple Multiplication",
runnable=one_tool_call_answer,
@@ -52,12 +52,12 @@ tests = {
"multiply": multiply
}
},
validator=regex_match_any,
validator=regex_match_any,
validation_input={
"patterns": [ "6134205", "6.134.205", "6,134,205" ]
}
),
283: Test(
),
283: Test(
name="Notes from last Saturday",
runnable=agent_with_tools,
runnable_input={
@@ -67,16 +67,16 @@ tests = {
"get_current_date_and_time": get_current_date_and_time,
"get_notes_in_timespan": get_notes_in_timespan,
"get_notes_containing": get_notes_containing,
"Write note": write_note
"Write note": write_note,
}
},
validator=system_human_answer_match,
validator=system_human_answer_match,
validation_input={
"criteria": dedent("""- containing the information that the Human should call Wolfgang
"criteria": dedent("""- containing the information that the Human should call Wolfgang
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
}
),
260: Test(
260: Test(
name="Notes from last Saturday TSO", # time span only
runnable=agent_with_tools,
runnable_input={
@@ -88,15 +88,15 @@ tests = {
"Write note": write_note
}
},
validator=system_human_answer_match,
validator=system_human_answer_match,
validation_input={
"criteria": dedent("""- containing the information that the Human should call Wolfgang
"criteria": dedent("""- containing the information that the Human should call Wolfgang
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
}
),
),
856: Test(
name="Notes from last Saturday TSO FSP",
runnable=agent_with_tools_fsp,
runnable=agent_with_tools,
runnable_input={
"system_msg": "You are a helpful assistant. You can use tools to accomplish tasks. Once you've called a tool, the resulting tool_message content can be taken into consideration again. With that you can do \"multiple rounds\" of tool calling. To know the date, use the tool get_current_date_and_time.",
"fsp_messages": [
@@ -121,12 +121,12 @@ tests = {
"Write note": write_note
}
},
validator=system_human_answer_match,
validator=system_human_answer_match,
validation_input={
"criteria": dedent("""- containing the information that the Human should call Wolfgang
"criteria": dedent("""- containing the information that the Human should call Wolfgang
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
}
),
),
# 363: Test(),
# 600: Test(),
# 221: Test(),