From 209c5850e9105d14153d99068952123378f884bf Mon Sep 17 00:00:00 2001 From: "Lennart J. Kurzweg (Nx2)" Date: Tue, 27 Aug 2024 17:36:51 +0200 Subject: [PATCH] values ++ --- suite_settings/models.py | 132 ++++++++--------------------------- suite_settings/seeds.py | 17 ++++- suite_settings/techniques.py | 4 +- 3 files changed, 48 insertions(+), 105 deletions(-) diff --git a/suite_settings/models.py b/suite_settings/models.py index 2a9b61b..bb26516 100644 --- a/suite_settings/models.py +++ b/suite_settings/models.py @@ -1,106 +1,34 @@ from libs.classes import Model models = { -# 245: Model( -# display_name="llama3.1 8b", -# identifier="llama3.1", -# supports_tools=True, -# parameter_count_in_b=8 -# ), -# 238: Model( -# display_name="llama3.1 70b", -# identifier="llama3.1:70b", -# supports_tools=True, -# parameter_count_in_b=70 -# ), -# 120: Model( -# display_name="llama3 groq TU 8b", -# identifier="llama3-groq-tool-use", -# supports_tools=True, -# parameter_count_in_b=8 -# ), -# 890: Model( -# display_name="llama3 groq TU 70b", -# identifier="llama3-groq-tool-use:70b", -# supports_tools=True, -# parameter_count_in_b=70 -# ), -# 348: Model( -# display_name="Mixtral MoE 8x7b", -# identifier="mixtral:8x7b", -# supports_tools=False, -# parameter_count_in_b=13, -# ), -# 789: Model( -# display_name="Mixtral MoE 8x22b", -# identifier="mixtral:8x22b", -# supports_tools=True, -# parameter_count_in_b=39 -# ), -# 445: Model( -# display_name="Gemma2 2b", -# identifier="gemma2:2b", -# supports_tools=False, -# parameter_count_in_b=2 -# ), -# 475: Model( -# display_name="Gemma2 9b", -# identifier="gemma2:2b", -# supports_tools=False, -# parameter_count_in_b=9 -# ), -# 626: Model( -# display_name="Gemma2 27b", -# identifier="gemma2:2b", -# supports_tools=False, -# parameter_count_in_b=27 -# ), -# 229: Model( -# display_name="Phi3 3.8b", -# identifier="phi3", -# supports_tools=False, -# parameter_count_in_b=3.8 -# ), -# 903: Model( -# display_name="Tinyllama 1.1b", -# identifier="tinyllama:1.1b", -# supports_tools=False, -# parameter_count_in_b=1.1 -# ), -# 670: Model( -# display_name="Mistral Nemo 12b", -# identifier="mistral-nemo:12b", -# supports_tools=True, -# parameter_count_in_b=12 -# ), -# 404: Model( -# display_name="Command R+ 104b", -# identifier="command-r-plus:104b", -# supports_tools=True, -# parameter_count_in_b=104 -# ), -# 701: Model( -# display_name="Yi 6b", -# identifier="yi:6b", -# supports_tools=False, -# parameter_count_in_b=6 -# ), - 704: Model( - display_name="Yi 9b", - identifier="yi:9b", - supports_tools=False, - parameter_count_in_b=6 - ), - 724: Model( - display_name="Yi 34b", - identifier="yi:34b", - supports_tools=False, - parameter_count_in_b=34 - ), - 853: Model( - display_name="Qwen2 0.5b", - identifier="qwen2:0.5b", - supports_tools=False, - parameter_count_in_b=0.5 - ), + 245: Model( display_name="llama3.1 8b", identifier="llama3.1:8b", supports_tools=True, parameter_count_in_b=8 ), + 238: Model( display_name="llama3.1 70b", identifier="llama3.1:70b", supports_tools=True, parameter_count_in_b=70 ), + 539: Model( display_name="llama3.1 405b", identifier="llama3.1:405b", supports_tools=True, parameter_count_in_b=405 ), + 120: Model( display_name="llama3 groq TU 8b", identifier="llama3-groq-tool-use:8b", supports_tools=True, parameter_count_in_b=8 ), + 890: Model( display_name="llama3 groq TU 70b", identifier="llama3-groq-tool-use:70b", supports_tools=True, parameter_count_in_b=70 ), + 639: Model( display_name="Mistral Large 123b", identifier="mistral-large:123b", supports_tools=True, parameter_count_in_b=123 ), + 982: Model( display_name="Mistral 0.3 7b", identifier="mistral:7b", supports_tools=True, parameter_count_in_b=7 ), + 670: Model( display_name="Mistral Nemo 12b", identifier="mistral-nemo:12b", supports_tools=True, parameter_count_in_b=12 ), + 348: Model( display_name="Mixtral MoE 8x7b", identifier="mixtral:8x7b", supports_tools=False, parameter_count_in_b=13, ), + 789: Model( display_name="Mixtral MoE 8x22b", identifier="mixtral:8x22b", supports_tools=True, parameter_count_in_b=39 ), + 453: Model( display_name="Zephyr 7b", identifier="zephyr:7b", supports_tools=False, parameter_count_in_b=7 ), + 445: Model( display_name="Gemma2 2b", identifier="gemma2:2b", supports_tools=False, parameter_count_in_b=2 ), + 475: Model( display_name="Gemma2 9b", identifier="gemma2:9b", supports_tools=False, parameter_count_in_b=9 ), + 626: Model( display_name="Gemma2 27b", identifier="gemma2:27b", supports_tools=False, parameter_count_in_b=27 ), + 229: Model( display_name="Phi3 3.8b", identifier="phi3:3.8b", supports_tools=False, parameter_count_in_b=3.8 ), + 329: Model( display_name="Phi3 14b", identifier="phi3:14b", supports_tools=False, parameter_count_in_b=14 ), + 251: Model( display_name="Phi3.5 3.8b", identifier="phi3.5:3.8b", supports_tools=False, parameter_count_in_b=3.8 ), + 903: Model( display_name="Tinyllama 1.1b", identifier="tinyllama:1.1b", supports_tools=False, parameter_count_in_b=1.1 ), + 409: Model( display_name="Command R 34b", identifier="command-r:34b", supports_tools=False, parameter_count_in_b=34 ), + 404: Model( display_name="Command R+ 104b", identifier="command-r-plus:104b", supports_tools=True, parameter_count_in_b=104 ), + 701: Model( display_name="Yi 6b", identifier="yi:6b", supports_tools=False, parameter_count_in_b=6 ), + 704: Model( display_name="Yi 9b", identifier="yi:9b", supports_tools=False, parameter_count_in_b=6 ), + 724: Model( display_name="Yi 34b", identifier="yi:34b", supports_tools=False, parameter_count_in_b=34 ), + 853: Model( display_name="Qwen2 0.5b", identifier="qwen2:0.5b", supports_tools=False, parameter_count_in_b=0.5 ), + 23: Model( display_name="Qwen2 1.5b", identifier="qwen2:1.5b", supports_tools=False, parameter_count_in_b=1.5 ), + 295: Model( display_name="Qwen2 7b", identifier="qwen2:7b", supports_tools=False, parameter_count_in_b=7 ), + 655: Model( display_name="Qwen2 72b", identifier="qwen2:72b", supports_tools=False, parameter_count_in_b=72 ), + 780: Model( display_name="Hermes3 8b", identifier="hermes3:8b", supports_tools=True, parameter_count_in_b=8 ), + 68: Model( display_name="Aya 8b", identifier="aya:8b", supports_tools=True, parameter_count_in_b=8 ), + 397: Model( display_name="Aya 35b", identifier="aya:35b", supports_tools=True, parameter_count_in_b=35 ), } diff --git a/suite_settings/seeds.py b/suite_settings/seeds.py index 5d5734e..0631e71 100644 --- a/suite_settings/seeds.py +++ b/suite_settings/seeds.py @@ -13,5 +13,20 @@ seeds = [ 789654, 10293847, 42, - 911 + 911, + 7861757, + 4813633, + 8936529, + 9859082, + 5866811, + 6992667, + 3535409, + 6313453, + 1760684, + 4038474, + 1305734, + 524084, + 7676954, + 8992671, + 4290451 ] diff --git a/suite_settings/techniques.py b/suite_settings/techniques.py index b1f9e9d..4c10044 100644 --- a/suite_settings/techniques.py +++ b/suite_settings/techniques.py @@ -8,12 +8,12 @@ techniques = { ), 903: Technique( name="LSM", # Long System Message - for_supports_tools=False, + for_supports_tools=True, for_not_supports_tools=True, ), 572: Technique( name="T2S", # Tool to System Messsages - for_supports_tools=False, + for_supports_tools=True, for_not_supports_tools=True, ), }