mega commit
This commit is contained in:
@@ -1,13 +1,112 @@
|
||||
models = [
|
||||
"llama3.1", # 8b
|
||||
"llama3.1:70b",
|
||||
"llama3-groq-tool-use", # latest
|
||||
"llama3-groq-tool-use:70b",
|
||||
# "mixtral:8x7b",
|
||||
"mixtral:8x22b",
|
||||
# "gemma2:2b",
|
||||
# "phi3", # 3.8b
|
||||
# "tinyllama:1.1b",
|
||||
"mistral-nemo:12b",
|
||||
"command-r-plus:104b",
|
||||
]
|
||||
from libs.classes import Model
|
||||
|
||||
models = {
|
||||
245: Model(
|
||||
display_name="llama3.1 8b",
|
||||
identifier="llama3.1",
|
||||
supports_tools=True,
|
||||
parameter_count_in_b=8
|
||||
),
|
||||
238: Model(
|
||||
display_name="llama3.1 70b",
|
||||
identifier="llama3.1:70b",
|
||||
supports_tools=True,
|
||||
parameter_count_in_b=70
|
||||
),
|
||||
120: Model(
|
||||
display_name="llama3 groq TU 8b",
|
||||
identifier="llama3-groq-tool-use",
|
||||
supports_tools=True,
|
||||
parameter_count_in_b=8
|
||||
),
|
||||
890: Model(
|
||||
display_name="llama3 groq TU 70b",
|
||||
identifier="llama3-groq-tool-use:70b",
|
||||
supports_tools=True,
|
||||
parameter_count_in_b=70
|
||||
),
|
||||
348: Model(
|
||||
display_name="Mixtral MoE 8x7b",
|
||||
identifier="mixtral:8x7b",
|
||||
supports_tools=False,
|
||||
parameter_count_in_b=13,
|
||||
),
|
||||
789: Model(
|
||||
display_name="Mixtral MoE 8x22b",
|
||||
identifier="mixtral:8x22b",
|
||||
supports_tools=True,
|
||||
parameter_count_in_b=39
|
||||
),
|
||||
445: Model(
|
||||
display_name="Gemma2 2b",
|
||||
identifier="gemma2:2b",
|
||||
supports_tools=False,
|
||||
parameter_count_in_b=2
|
||||
),
|
||||
475: Model(
|
||||
display_name="Gemma2 9b",
|
||||
identifier="gemma2:2b",
|
||||
supports_tools=False,
|
||||
parameter_count_in_b=9
|
||||
),
|
||||
626: Model(
|
||||
display_name="Gemma2 27b",
|
||||
identifier="gemma2:2b",
|
||||
supports_tools=False,
|
||||
parameter_count_in_b=27
|
||||
),
|
||||
229: Model(
|
||||
display_name="Phi3 3.8b",
|
||||
identifier="phi3",
|
||||
supports_tools=False,
|
||||
parameter_count_in_b=3.8
|
||||
),
|
||||
903: Model(
|
||||
display_name="Tinyllama 1.1b",
|
||||
identifier="tinyllama:1.1b",
|
||||
supports_tools=False,
|
||||
parameter_count_in_b=1.1
|
||||
),
|
||||
670: Model(
|
||||
display_name="Mistral Nemo 12b",
|
||||
identifier="mistral-nemo:12b",
|
||||
supports_tools=True,
|
||||
parameter_count_in_b=12
|
||||
),
|
||||
404: Model(
|
||||
display_name="Command R+ 104b",
|
||||
identifier="command-r-plus:104b",
|
||||
supports_tools=True,
|
||||
parameter_count_in_b=104
|
||||
),
|
||||
701: Model(
|
||||
display_name="Yi 6b",
|
||||
identifier="yi:7b",
|
||||
supports_tools=False,
|
||||
parameter_count_in_b=6
|
||||
),
|
||||
704: Model(
|
||||
display_name="Yi 6b",
|
||||
identifier="yi:7b",
|
||||
supports_tools=False,
|
||||
parameter_count_in_b=6
|
||||
),
|
||||
724: Model(
|
||||
display_name="Yi 34b",
|
||||
identifier="yi:34b",
|
||||
supports_tools=False,
|
||||
parameter_count_in_b=34
|
||||
),
|
||||
129: Model(
|
||||
display_name="Yi 34b",
|
||||
identifier="yi:34b",
|
||||
supports_tools=False,
|
||||
parameter_count_in_b=34
|
||||
),
|
||||
853: Model(
|
||||
display_name="Qwen2 0.5b",
|
||||
identifier="qwen2:0.5b",
|
||||
supports_tools=False,
|
||||
parameter_count_in_b=0.5
|
||||
),
|
||||
}
|
||||
|
||||
@@ -1,21 +1,21 @@
|
||||
from libs.test_class import Test
|
||||
from libs.runnables import *
|
||||
from libs.validators import *
|
||||
from libs.tools import *
|
||||
from libs.classes import Test
|
||||
from libs.runnables import basic_prompt, one_tool_call_answer, agent_with_tools
|
||||
from libs.validators import regex_match_any, system_human_answer_match
|
||||
from libs.tools import add, multiply, get_current_date_and_time, get_notes_in_timespan, get_notes_containing, write_note
|
||||
from textwrap import dedent
|
||||
from langchain_core.messages import HumanMessage, SystemMessage, ToolMessage, AIMessage
|
||||
|
||||
tests = {
|
||||
607: Test(
|
||||
name="Healthy Vegetables in Chinese",
|
||||
runnable=basic,
|
||||
runnable=basic_prompt,
|
||||
runnable_input={
|
||||
"system_msg": "You are a helpful assistant. You serve people across the globe.",
|
||||
"human_msg": "什么蔬菜最健康?",
|
||||
},
|
||||
validator=system_human_answer_match,
|
||||
validator=system_human_answer_match,
|
||||
validation_input={
|
||||
"criteria": dedent("""- in Mandarin Chinese from front to finnish
|
||||
"criteria": dedent("""- in Mandarin Chinese from front to finnish
|
||||
- factually correct
|
||||
- about healthy vegetables
|
||||
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes)
|
||||
@@ -23,7 +23,7 @@ tests = {
|
||||
Again, the message has to be entirely in Manadarin Chineese.
|
||||
That means If the answer is not in Chinese the answer is NOT correct! Only if the message in in Chinese rate as correct"""),
|
||||
}
|
||||
),
|
||||
),
|
||||
693: Test(
|
||||
name="Simple Multiplication",
|
||||
runnable=one_tool_call_answer,
|
||||
@@ -52,12 +52,12 @@ tests = {
|
||||
"multiply": multiply
|
||||
}
|
||||
},
|
||||
validator=regex_match_any,
|
||||
validator=regex_match_any,
|
||||
validation_input={
|
||||
"patterns": [ "6134205", "6.134.205", "6,134,205" ]
|
||||
}
|
||||
),
|
||||
283: Test(
|
||||
),
|
||||
283: Test(
|
||||
name="Notes from last Saturday",
|
||||
runnable=agent_with_tools,
|
||||
runnable_input={
|
||||
@@ -67,16 +67,16 @@ tests = {
|
||||
"get_current_date_and_time": get_current_date_and_time,
|
||||
"get_notes_in_timespan": get_notes_in_timespan,
|
||||
"get_notes_containing": get_notes_containing,
|
||||
"Write note": write_note
|
||||
"Write note": write_note,
|
||||
}
|
||||
},
|
||||
validator=system_human_answer_match,
|
||||
validator=system_human_answer_match,
|
||||
validation_input={
|
||||
"criteria": dedent("""- containing the information that the Human should call Wolfgang
|
||||
"criteria": dedent("""- containing the information that the Human should call Wolfgang
|
||||
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
|
||||
}
|
||||
),
|
||||
260: Test(
|
||||
260: Test(
|
||||
name="Notes from last Saturday TSO", # time span only
|
||||
runnable=agent_with_tools,
|
||||
runnable_input={
|
||||
@@ -88,15 +88,15 @@ tests = {
|
||||
"Write note": write_note
|
||||
}
|
||||
},
|
||||
validator=system_human_answer_match,
|
||||
validator=system_human_answer_match,
|
||||
validation_input={
|
||||
"criteria": dedent("""- containing the information that the Human should call Wolfgang
|
||||
"criteria": dedent("""- containing the information that the Human should call Wolfgang
|
||||
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
|
||||
}
|
||||
),
|
||||
),
|
||||
856: Test(
|
||||
name="Notes from last Saturday TSO FSP",
|
||||
runnable=agent_with_tools_fsp,
|
||||
runnable=agent_with_tools,
|
||||
runnable_input={
|
||||
"system_msg": "You are a helpful assistant. You can use tools to accomplish tasks. Once you've called a tool, the resulting tool_message content can be taken into consideration again. With that you can do \"multiple rounds\" of tool calling. To know the date, use the tool get_current_date_and_time.",
|
||||
"fsp_messages": [
|
||||
@@ -121,12 +121,12 @@ tests = {
|
||||
"Write note": write_note
|
||||
}
|
||||
},
|
||||
validator=system_human_answer_match,
|
||||
validator=system_human_answer_match,
|
||||
validation_input={
|
||||
"criteria": dedent("""- containing the information that the Human should call Wolfgang
|
||||
"criteria": dedent("""- containing the information that the Human should call Wolfgang
|
||||
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
|
||||
}
|
||||
),
|
||||
),
|
||||
# 363: Test(),
|
||||
# 600: Test(),
|
||||
# 221: Test(),
|
||||
|
||||
Reference in New Issue
Block a user