forked from shreyansh-shankar/NodeBox
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodels_data.py
More file actions
1227 lines (1227 loc) · 46 KB
/
models_data.py
File metadata and controls
1227 lines (1227 loc) · 46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
models = [
{
"name": "deepseek-r1",
"icon": "assets/models_logo/deepseek.png",
"description": "DeepSeek-R1 is a family of open reasoning models with performance approaching that of leading models, such as O3 and Gemini 2.5 Pro.",
"tags": ["deepseek", "thinking" "tools"],
"sizes": ["1.5b", "7b", "8b", "14b", "32b", "70b", "671b"],
},
{
"name": "gemma3n",
"icon": "assets/models_logo/gemma.png",
"description": "Gemma 3n models are designed for efficient execution on everyday devices such as laptops, tablets or phones. ",
"tags": ["gemma"],
"sizes": ["e2b", "e4b"],
},
{
"name": "gemma3",
"icon": "assets/models_logo/gemma.png",
"description": "The current, most capable model that runs on a single GPU.",
"tags": ["gemma", "vision"],
"sizes": ["1b", "4b", "12b", "27b"],
},
{
"name": "qwen3",
"icon": "assets/models_logo/qwen.png",
"description": "Qwen3 is the latest generation of large language models in Qwen series, offering a comprehensive suite of dense and mixture-of-experts (MoE) models.",
"tags": ["qwen", "tools", "thinking"],
"sizes": ["0.6b", "1.7b", "4b", "8b", "14b", "30b", "32b", "235b"],
},
{
"name": "qwen2.5vl",
"icon": "assets/models_logo/qwen.png",
"description": "Flagship vision-language model of Qwen and also a significant leap from the previous Qwen2-VL.",
"tags": ["qwen", "vision"],
"sizes": ["3b", "7b", "32b", "72b"],
},
{
"name": "llama3.1",
"icon": "assets/models_logo/llama.png",
"description": "Llama 3.1 is a new state-of-the-art model from Meta available in 8B, 70B and 405B parameter sizes.",
"tags": ["llama", "tools"],
"sizes": ["8b", "70b", "405b"],
},
{
"name": "nomic-embed-text",
"icon": "assets/models_logo/nomic.png",
"description": "A high-performing open embedding model with a large token context window.",
"tags": ["embedding"],
"sizes": [],
},
{
"name": "llama3.2",
"icon": "assets/models_logo/llama.png",
"description": "Meta's Llama 3.2 goes small with 1B and 3B models.",
"tags": ["tools"],
"sizes": ["1b", "3b"],
},
{
"name": "mistral",
"icon": "assets/models_logo/mistral.png",
"description": "The 7B model released by Mistral AI, updated to version 0.3.",
"tags": ["tools"],
"sizes": ["7b"],
},
{
"name": "qwen2.5",
"icon": "assets/models_logo/qwen.png",
"description": "Qwen2.5 models are pretrained on Alibaba's latest large-scale dataset, encompassing up to 18 trillion tokens. The model supports up to 128K tokens and has multilingual support.",
"tags": ["tools"],
"sizes": ["0.5b", "1.5b", "3b", "7b", "14b", "32b", "72b"],
},
{
"name": "llama3",
"icon": "assets/models_logo/llama.png",
"description": "Meta Llama 3: The most capable openly available LLM to date.",
"tags": [],
"sizes": ["8b", "70b"],
},
{
"name": "llava",
"icon": "assets/models_logo/llava.png",
"description": "LLaVA is a novel end-to-end trained large multimodal model that combines a vision encoder and Vicuna for general-purpose visual and language understanding.",
"tags": ["vision"],
"sizes": ["7b", "13b", "34b"],
},
{
"name": "phi3",
"icon": "assets/models_logo/phi.png",
"description": "Phi-3 is a family of lightweight 3B (Mini) and 14B (Medium) state-of-the-art open models by Microsoft.",
"tags": ["phi"],
"sizes": ["3.8b", "14b"],
},
{
"name": "gemma2",
"icon": "assets/models_logo/gemma.png",
"description": "Google Gemma 2 is a high-performing and efficient model available in three sizes: 2B, 9B, and 27B.",
"tags": ["gemma"],
"sizes": ["2b", "9b", "27b"],
},
{
"name": "qwen2.5-coder",
"icon": "assets/models_logo/qwen.png",
"description": "The latest series of Code-Specific Qwen models, with significant improvements in code generation, code reasoning, and code fixing.",
"tags": ["qwen", "tools"],
"sizes": ["0.5b", "1.5b", "3b", "7b", "14b", "32b"],
},
{
"name": "gemma",
"icon": "assets/models_logo/gemma.png",
"description": "Gemma is a family of lightweight, state-of-the-art open models built by Google DeepMind. Updated to version 1.1.",
"tags": ["gemma"],
"sizes": ["2b", "7b"],
},
{
"name": "qwen",
"icon": "assets/models_logo/qwen.png",
"description": "Qwen 1.5 is a series of large language models by Alibaba Cloud spanning from 0.5B to 110B parameters.",
"tags": ["qwen"],
"sizes": ["0.5b", "1.8b", "4b", "7b", "14b", "32b", "72b", "110b"],
},
{
"name": "mxbai-embed-large",
"icon": "assets/models_logo/mxbai.png",
"description": "State-of-the-art large embedding model from mixedbread.ai.",
"tags": ["mxbai", "embedding"],
"sizes": ["335m"],
},
{
"name": "qwen2",
"icon": "assets/models_logo/qwen.png",
"description": "Qwen2 is a new series of large language models from Alibaba group.",
"tags": ["qwen", "tools"],
"sizes": ["0.5b", "1.5b", "7b", "72b"],
},
{
"name": "llama2",
"icon": "assets/models_logo/llama.png",
"description": "Llama 2 is a collection of foundation language models ranging from 7B to 70B parameters.",
"tags": ["llama"],
"sizes": ["7b", "13b", "70b"],
},
{
"name": "phi4",
"icon": "assets/models_logo/phi.png",
"description": "Phi-4 is a 14B parameter, state-of-the-art open model from Microsoft.",
"tags": ["phi"],
"sizes": ["14b"],
},
{
"name": "minicpm-v",
"icon": "assets/models_logo/minicpm.png",
"description": "A series of multimodal LLMs (MLLMs) designed for vision-language understanding.",
"tags": ["minicpm", "vision"],
"sizes": ["8b"],
},
{
"name": "codellama",
"icon": "assets/models_logo/llama.png",
"description": "A large language model that can use text prompts to generate and discuss code.",
"tags": ["llama"],
"sizes": ["7b", "13b", "34b", "70b"],
},
{
"name": "tinyllama",
"icon": "assets/models_logo/llama.png",
"description": "The TinyLlama project is an open endeavor to train a compact 1.1B Llama model on 3 trillion tokens.",
"tags": ["llama"],
"sizes": ["1.1b"],
},
{
"name": "llama3.3",
"icon": "assets/models_logo/llama.png",
"description": "New state of the art 70B model. Llama 3.3 70B offers similar performance compared to the Llama 3.1 405B model.",
"tags": ["llama", "tools"],
"sizes": ["70b"],
},
{
"name": "llama3.2-vision",
"icon": "assets/models_logo/llama.png",
"description": "Llama 3.2 Vision is a collection of instruction-tuned image reasoning generative models in 11B and 90B sizes.",
"tags": ["llama", "vision"],
"sizes": ["11b", "90b"],
},
{
"name": "dolphin3",
"icon": "assets/models_logo/dolphin.png",
"description": "Dolphin 3.0 Llama 3.1 8B 🐬 is the next generation of the Dolphin series for general purpose use, including coding and agentic reasoning.",
"tags": ["dolphin"],
"sizes": ["8b"],
},
{
"name": "mistral-nemo",
"icon": "assets/models_logo/mistral.png",
"description": "A state-of-the-art 12B model with 128k context length, built by Mistral AI in collaboration with NVIDIA.",
"tags": ["mistral", "tools"],
"sizes": ["12b"],
},
{
"name": "olmo2",
"icon": "assets/models_logo/olmo.png",
"description": "OLMo 2 is a family of 7B and 13B models trained on up to 5T tokens, competitive with Llama 3.1.",
"tags": ["olmo"],
"sizes": ["7b", "13b"],
},
{
"name": "deepseek-v3",
"icon": "assets/models_logo/deepseek.png",
"description": "A strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token.",
"tags": ["deepseek"],
"sizes": ["671b"],
},
{
"name": "bge-m3",
"icon": "assets/models_logo/bge.png",
"description": "BGE-M3 is a new model from BAAI for Multi-Functionality, Multi-Linguality, and Multi-Granularity.",
"tags": ["bge", "embedding"],
"sizes": ["567m"],
},
{
"name": "qwq",
"icon": "assets/models_logo/qwq.png",
"description": "QwQ is the reasoning model of the Qwen series.",
"tags": ["qwq", "tools"],
"sizes": ["32b"],
},
{
"name": "mistral-small",
"icon": "assets/models_logo/mistral.png",
"description": "Mistral Small 3 sets a new benchmark in the 'small' LLM category below 70B.",
"tags": ["mistral", "tools"],
"sizes": ["22b", "24b"],
},
{
"name": "llava-llama3",
"icon": "assets/models_logo/llava.png",
"description": "A LLaVA model fine-tuned from Llama 3 Instruct with better benchmark scores.",
"tags": ["llava", "vision"],
"sizes": ["8b"],
},
{
"name": "smollm2",
"icon": "assets/models_logo/smoll.png",
"description": "SmolLM2 is a family of compact language models in 135M, 360M, and 1.7B sizes.",
"tags": ["smoll", "tools"],
"sizes": ["135m", "360m", "1.7b"],
},
{
"name": "llama2-uncensored",
"icon": "assets/models_logo/llama.png",
"description": "Uncensored Llama 2 model by George Sung and Jarrad Hope.",
"tags": ["llama"],
"sizes": ["7b", "70b"],
},
{
"name": "mixtral",
"icon": "assets/models_logo/mistral.png",
"description": "A set of Mixture of Experts models with open weights from Mistral AI.",
"tags": ["mistral", "tools"],
"sizes": ["8x7b", "8x22b"],
},
{
"name": "starcoder2",
"icon": "assets/models_logo/starcoder.png",
"description": "StarCoder2 is the next generation of transparently trained open code LLMs.",
"tags": ["star"],
"sizes": ["3b", "7b", "15b"],
},
{
"name": "deepseek-coder-v2",
"icon": "assets/models_logo/deepseek.png",
"description": "An open-source MoE code model achieving GPT-4 Turbo-level performance on code tasks.",
"tags": ["deepseek"],
"sizes": ["16b", "236b"],
},
{
"name": "all-minilm",
"icon": "assets/models_logo/phi.png",
"description": "Embedding models trained on very large sentence-level datasets.",
"tags": ["minilm", "embedding"],
"sizes": ["22m", "33m"],
},
{
"name": "deepseek-coder",
"icon": "assets/models_logo/deepseek.png",
"description": "DeepSeek Coder is a capable coding model trained on two trillion code and natural language tokens.",
"tags": ["deepseek"],
"sizes": ["1.3b", "6.7b", "33b"],
},
{
"name": "snowflake-arctic-embed",
"icon": "assets/models_logo/snowflake.png",
"description": "Text embedding models by Snowflake, optimized for performance.",
"tags": ["snowflake", "embedding"],
"sizes": ["22m", "33m", "110m", "137m", "335m"],
},
{
"name": "codegemma",
"icon": "assets/models_logo/gemma.png",
"description": "Powerful, lightweight models that perform a variety of coding tasks including reasoning and instruction following.",
"tags": ["gemma"],
"sizes": ["2b", "7b"],
},
{
"name": "phi",
"icon": "assets/models_logo/phi.png",
"description": "Phi-2: a 2.7B language model by Microsoft Research with outstanding reasoning capabilities.",
"tags": ["phi"],
"sizes": ["2.7b"],
},
{
"name": "dolphin-mixtral",
"icon": "assets/models_logo/dolphin.png",
"description": "Uncensored, 8x7b and 8x22b fine-tuned models based on Mixtral by Eric Hartford.",
"tags": ["dolphin"],
"sizes": ["8x7b", "8x22b"],
},
{
"name": "openthinker",
"icon": "assets/models_logo/openthinker.png",
"description": "An open-source reasoning model family derived from DeepSeek-R1.",
"tags": ["openthinker"],
"sizes": ["7b", "32b"],
},
{
"name": "llama4",
"icon": "assets/models_logo/llama.png",
"description": "Meta's latest collection of multimodal models.",
"tags": ["llama", "vision", "tools"],
"sizes": ["16x17b", "128x17b"],
},
{
"name": "orca-mini",
"icon": "assets/models_logo/phi.png",
"description": "A general-purpose model for entry-level hardware ranging from 3B to 70B.",
"tags": [],
"sizes": ["3b", "7b", "13b", "70b"],
},
{
"name": "wizardlm2",
"icon": "assets/models_logo/wizardlm.png",
"description": "Microsoft's LLM with improved multilingual, reasoning and agent use case performance.",
"tags": ["wizardlm"],
"sizes": ["7b", "8x22b"],
},
{
"name": "smollm",
"icon": "assets/models_logo/smoll.png",
"description": "A family of small models trained on a new high-quality dataset.",
"tags": ["smoll"],
"sizes": ["135m", "360m", "1.7b"],
},
{
"name": "dolphin-mistral",
"icon": "assets/models_logo/dolphin.png",
"description": "Uncensored Dolphin model based on Mistral for coding tasks. Version 2.8.",
"tags": ["dolphin"],
"sizes": ["7b"],
},
{
"name": "codestral",
"icon": "assets/models_logo/mistral.png",
"description": "Mistral AI’s first code model designed for generation tasks.",
"tags": ["mistral"],
"sizes": ["22b"],
},
{
"name": "dolphin-llama3",
"icon": "assets/models_logo/dolphin.png",
"description": "Dolphin 2.9 based on Llama 3 for coding and instruction, available in 8B and 70B sizes.",
"tags": ["dolphin"],
"sizes": ["8b", "70b"],
},
{
"name": "command-r",
"icon": "assets/models_logo/command.png",
"description": "Command R is a LLM optimized for conversational and long-context tasks.",
"tags": ["command", "tools"],
"sizes": ["35b"],
},
{
"name": "hermes3",
"icon": "assets/models_logo/hermes.png",
"description": "Hermes 3 is the latest LLM from Nous Research with improved reasoning and agentic skills.",
"tags": ["hermes", "tools"],
"sizes": ["3b", "8b", "70b", "405b"],
},
{
"name": "phi3.5",
"icon": "assets/models_logo/phi.png",
"description": "Phi-3.5: A 3.8B model outperforming similarly and larger-sized models.",
"tags": ["phi"],
"sizes": ["3.8b"],
},
{
"name": "yi",
"icon": "assets/models_logo/yi.png",
"description": "Yi 1.5 is a bilingual high-performing LLM.",
"tags": ["yi"],
"sizes": ["6b", "9b", "34b"],
},
{
"name": "zephyr",
"icon": "assets/models_logo/zephyr.jpeg",
"description": "Zephyr is a series of fine-tuned Mistral and Mixtral models trained as helpful assistants.",
"tags": ["zephyr"],
"sizes": ["7b", "141b"],
},
{
"name": "granite3.3",
"icon": "assets/models_logo/granite.png",
"description": "IBM Granite 2B and 8B models with 128K context, fine-tuned for reasoning and instruction-following.",
"tags": ["tools"],
"sizes": ["2b", "8b"],
},
{
"name": "phi4-mini",
"icon": "assets/models_logo/phi.png",
"description": "Phi-4-mini enhances multilingual support, reasoning, math, and adds function calling.",
"tags": ["tools"],
"sizes": ["3.8b"],
},
{
"name": "moondream",
"icon": "assets/models_logo/moondream.png",
"description": "Moondream2 is a small vision-language model optimized for edge devices.",
"tags": ["vision"],
"sizes": ["1.8b"],
},
{
"name": "granite-code",
"icon": "assets/models_logo/granite.png",
"description": "A family of open foundation models by IBM for code intelligence.",
"tags": [],
"sizes": ["3b", "8b", "20b", "34b"],
},
{
"name": "wizard-vicuna-uncensored",
"icon": "assets/models_logo/wizardlm.png",
"description": "Uncensored versions of Llama 2 by Eric Hartford, with 7B, 13B, and 30B parameters.",
"tags": [],
"sizes": ["7b", "13b", "30b"],
},
{
"name": "devstral",
"icon": "assets/models_logo/mistral.png",
"description": "Devstral: the best open source model for coding agents.",
"tags": ["tools"],
"sizes": ["24b"],
},
{
"name": "magistral",
"icon": "assets/models_logo/mistral.png",
"description": "Small, efficient reasoning model with 24B parameters.",
"tags": ["tools", "thinking"],
"sizes": ["24b"],
},
{
"name": "starcoder",
"icon": "assets/models_logo/starcoder.png",
"description": "StarCoder is a code generation model trained on 80+ programming languages.",
"tags": [],
"sizes": ["1b", "3b", "7b", "15b"],
},
{
"name": "phi4-reasoning",
"icon": "assets/models_logo/phi.png",
"description": "Phi 4 reasoning models excel at complex tasks with 14B parameters.",
"tags": [],
"sizes": ["14b"],
},
{
"name": "mistral-small3.1",
"icon": "assets/models_logo/mistral.png",
"description": "Mistral Small 3.1 enhances vision understanding and long-context capabilities.",
"tags": ["vision", "tools"],
"sizes": ["24b"],
},
{
"name": "vicuna",
"icon": "assets/models_logo/vicuna.jpeg",
"description": "General use chat model based on Llama and Llama 2 with 2K to 16K context sizes.",
"tags": [],
"sizes": ["7b", "13b", "33b"],
},
{
"name": "cogito",
"icon": "assets/models_logo/cogito.png",
"description": "Cogito v1 Preview is a hybrid reasoning model that outperforms top open models across most benchmarks.",
"tags": ["tools"],
"sizes": ["3b", "8b", "14b", "32b", "70b"],
},
{
"name": "deepcoder",
"icon": "assets/models_logo/deepcoder.png",
"description": "DeepCoder is a fully open-source coder model, available in 1.5B and 14B sizes.",
"tags": ["code"],
"sizes": ["1.5b", "14b"],
},
{
"name": "openchat",
"icon": "assets/models_logo/openchat.png",
"description": "OpenChat models outperform ChatGPT in benchmarks and support general-purpose chat capabilities.",
"tags": [],
"sizes": ["7b"],
},
{
"name": "mistral-openorca",
"icon": "assets/models_logo/phi.png",
"description": "A 7B model fine-tuned with OpenOrca dataset on top of Mistral for improved instruction following.",
"tags": [],
"sizes": ["7b"],
},
{
"name": "codegeex4",
"icon": "assets/models_logo/codegeex.png",
"description": "Versatile model for AI-assisted software development and code completion.",
"tags": ["code"],
"sizes": ["9b"],
},
{
"name": "deepseek-llm",
"icon": "assets/models_logo/deepseek.png",
"description": "An advanced bilingual model trained on 2 trillion tokens for general language tasks.",
"tags": [],
"sizes": ["7b", "67b"],
},
{
"name": "deepseek-v2",
"icon": "assets/models_logo/deepseek.png",
"description": "A strong and efficient Mixture-of-Experts language model by DeepSeek.",
"tags": [],
"sizes": ["16b", "236b"],
},
{
"name": "openhermes",
"icon": "assets/models_logo/hermes.png",
"description": "OpenHermes 2.5 is a 7B model trained on open datasets, designed for chat and reasoning tasks.",
"tags": [],
"sizes": ["7b"],
},
{
"name": "granite3.2-vision",
"icon": "assets/models_logo/granite.png",
"description": "Compact vision-language model designed for visual document understanding.",
"tags": ["vision", "tools"],
"sizes": ["2b"],
},
{
"name": "codeqwen",
"icon": "assets/models_logo/qwen.png",
"description": "CodeQwen1.5 is pretrained on extensive code datasets for software generation.",
"tags": ["code"],
"sizes": ["7b"],
},
{
"name": "mistral-large",
"icon": "assets/models_logo/mistral.png",
"description": "Mistral's flagship 123B model with strong performance in code, math, and reasoning.",
"tags": ["tools"],
"sizes": ["123b"],
},
{
"name": "llama2-chinese",
"icon": "assets/models_logo/llama.png",
"description": "Llama 2 model fine-tuned to enhance Chinese dialogue performance.",
"tags": [],
"sizes": ["7b", "13b"],
},
{
"name": "aya",
"icon": "assets/models_logo/command.png",
"description": "Aya is a multilingual model supporting 23 languages with competitive performance.",
"tags": [],
"sizes": ["8b", "35b"],
},
{
"name": "tinydolphin",
"icon": "assets/models_logo/dolphin.png",
"description": "An experimental 1.1B model based on TinyLlama and trained on Dolphin 2.8 data.",
"tags": [],
"sizes": ["1.1b"],
},
{
"name": "qwen2-math",
"icon": "assets/models_logo/qwen.png",
"description": "Specialized math model built on Qwen2 with superior mathematical reasoning.",
"tags": [],
"sizes": ["1.5b", "7b", "72b"],
},
{
"name": "glm4",
"icon": "assets/models_logo/codegeex.png",
"description": "GLM4 is a strong multilingual LLM with performance competitive with Llama 3.",
"tags": [],
"sizes": ["9b"],
},
{
"name": "stable-code",
"icon": "assets/models_logo/stablecode.png",
"description": "Stable Code 3B is a compact model for instruction and code completion.",
"tags": ["code"],
"sizes": ["3b"],
},
{
"name": "nous-hermes2",
"icon": "assets/models_logo/hermes.png",
"description": "Family of models by Nous Research tailored for science and code.",
"tags": [],
"sizes": ["10.7b", "34b"],
},
{
"name": "wizardcoder",
"icon": "assets/models_logo/wizardlm.png",
"description": "High-performance model for code generation tasks.",
"tags": ["code"],
"sizes": ["33b"],
},
{
"name": "command-r-plus",
"icon": "assets/models_logo/command.png",
"description": "Scalable enterprise-grade LLM optimized for real-world use cases.",
"tags": ["tools"],
"sizes": ["104b"],
},
{
"name": "bakllava",
"icon": "assets/models_logo/llava.png",
"description": "Multimodal model based on Mistral 7B with the LLaVA architecture.",
"tags": ["vision"],
"sizes": ["7b"],
},
{
"name": "neural-chat",
"icon": "assets/models_logo/intel.png",
"description": "Fine-tuned Mistral model with strong domain and language coverage.",
"tags": [],
"sizes": ["7b"],
},
{
"name": "granite3.2",
"icon": "assets/models_logo/granite.png",
"description": "Long-context AI models fine-tuned for thinking and reasoning capabilities.",
"tags": ["tools"],
"sizes": ["2b", "8b"],
},
{
"name": "stablelm2",
"icon": "assets/models_logo/stablecode.png",
"description": "StableLM 2 models trained on multilingual data with high performance.",
"tags": [],
"sizes": ["1.6b", "12b"],
},
{
"name": "bge-large",
"icon": "assets/models_logo/bge.png",
"description": "BAAI’s large embedding model mapping texts to semantic vector space.",
"tags": ["embedding"],
"sizes": ["335m"],
},
{
"name": "sqlcoder",
"icon": "assets/models_logo/sqlcoder.png",
"description": "SQLCoder is a specialized model for generating SQL from natural language.",
"tags": ["code"],
"sizes": ["7b", "15b"],
},
{
"name": "llama3-chatqa",
"icon": "assets/models_logo/llama.png",
"description": "A model from NVIDIA based on Llama 3 that excels at conversational question answering (QA) and retrieval-augmented generation (RAG).",
"tags": ["llama3", "qa", "rag"],
"sizes": ["8b", "70b"],
},
{
"name": "snowflake-arctic-embed2",
"icon": "assets/models_logo/snowflake.png",
"description": "Snowflake's frontier embedding model. Arctic Embed 2.0 adds multilingual support without sacrificing English performance or scalability.",
"tags": ["embedding"],
"sizes": ["568m"],
},
{
"name": "reflection",
"icon": "assets/models_logo/reflection.png",
"description": "A high-performing model trained with a new technique called Reflection-tuning that teaches a LLM to detect mistakes in its reasoning and correct course.",
"tags": [],
"sizes": ["70b"],
},
{
"name": "wizard-math",
"icon": "assets/models_logo/wizardlm.png",
"description": "Model focused on math and logic problems.",
"tags": ["math"],
"sizes": ["7b", "13b", "70b"],
},
{
"name": "llava-phi3",
"icon": "assets/models_logo/phi.png",
"description": "A new small LLaVA model fine-tuned from Phi 3 Mini.",
"tags": ["vision"],
"sizes": ["3.8b"],
},
{
"name": "granite3.1-dense",
"icon": "assets/models_logo/granite.png",
"description": "The IBM Granite 2B and 8B models are text-only dense LLMs trained on over 12 trillion tokens of data, demonstrating significant improvements over predecessors.",
"tags": ["tools"],
"sizes": ["2b", "8b"],
},
{
"name": "granite3-dense",
"icon": "assets/models_logo/granite.png",
"description": "IBM Granite 2B and 8B models designed for RAG and code generation tasks.",
"tags": ["tools"],
"sizes": ["2b", "8b"],
},
{
"name": "llama3-gradient",
"icon": "assets/models_logo/llama.png",
"description": "This model extends LLama-3 8B's context length from 8k to over 1M tokens.",
"tags": [],
"sizes": ["8b", "70b"],
},
{
"name": "nous-hermes",
"icon": "assets/models_logo/hermes.png",
"description": "General use models based on Llama and Llama 2 from Nous Research.",
"tags": ["nous"],
"sizes": ["7b", "13b"],
},
{
"name": "dbrx",
"icon": "assets/models_logo/ailogo.jpg",
"description": "DBRX is an open, general-purpose LLM created by Databricks.",
"tags": [],
"sizes": ["132b"],
},
{
"name": "exaone3.5",
"icon": "assets/models_logo/ailogo.jpg",
"description": "EXAONE 3.5 is a collection of instruction-tuned bilingual models ranging from 2.4B to 32B by LG AI Research.",
"tags": ["bilingual"],
"sizes": ["2.4b", "7.8b", "32b"],
},
{
"name": "samantha-mistral",
"icon": "assets/models_logo/mistral.png",
"description": "A companion assistant trained in philosophy, psychology, and personal relationships. Based on Mistral.",
"tags": [],
"sizes": ["7b"],
},
{
"name": "yi-coder",
"icon": "assets/models_logo/yi.png",
"description": "Yi-Coder is a series of open-source code language models delivering SOTA performance under 10B params.",
"tags": ["coder"],
"sizes": ["1.5b", "9b"],
},
{
"name": "dolphincoder",
"icon": "assets/models_logo/dolphin.png",
"description": "Uncensored 7B and 15B variant of the Dolphin model family, excelling at code tasks.",
"tags": ["coding"],
"sizes": ["7b", "15b"],
},
{
"name": "nemotron-mini",
"icon": "assets/models_logo/ailogo.jpg",
"description": "Commercial-friendly small LLM by NVIDIA optimized for roleplay, RAG QA, and function calling.",
"tags": ["tools"],
"sizes": ["4b"],
},
{
"name": "starling-lm",
"icon": "assets/models_logo/ailogo.jpg",
"description": "A large language model trained by reinforcement learning from AI feedback focused on chatbot helpfulness.",
"tags": [],
"sizes": ["7b"],
},
{
"name": "falcon",
"icon": "assets/models_logo/ailogo.jpg",
"description": "A model built by TII for summarization, text generation, and chat.",
"tags": [],
"sizes": ["7b", "40b", "180b"],
},
{
"name": "phind-codellama",
"icon": "assets/models_logo/llama.png",
"description": "Code generation model based on Code Llama.",
"tags": ["coding"],
"sizes": ["34b"],
},
{
"name": "solar",
"icon": "assets/models_logo/ailogo.jpg",
"description": "A compact, yet powerful 10.7B LLM designed for single-turn conversation.",
"tags": [],
"sizes": ["10.7b"],
},
{
"name": "xwinlm",
"icon": "assets/models_logo/ailogo.jpg",
"description": "Conversational model based on Llama 2 performing competitively on various benchmarks.",
"tags": [],
"sizes": ["7b", "13b"],
},
{
"name": "internlm2",
"icon": "assets/models_logo/ailogo.jpg",
"description": "InternLM2.5 is a 7B parameter model tailored for practical scenarios with outstanding reasoning capability.",
"tags": [],
"sizes": ["1m", "1.8b", "7b", "20b"],
},
{
"name": "deepscaler",
"icon": "assets/models_logo/deepseek.png",
"description": "A fine-tuned version of Deepseek-R1-Distilled-Qwen-1.5B that surpasses the performance of OpenAI’s o1-preview with just 1.5B parameters on popular math evaluations.",
"tags": [],
"sizes": ["1.5b"],
},
{
"name": "athene-v2",
"icon": "assets/models_logo/ailogo.jpg",
"description": "Athene-V2 is a 72B parameter model which excels at code completion, mathematics, and log extraction tasks.",
"tags": ["tools"],
"sizes": ["72b"],
},
{
"name": "nemotron",
"icon": "assets/models_logo/ailogo.jpg",
"description": "Llama-3.1-Nemotron-70B-Instruct is a large language model customized by NVIDIA to improve the helpfulness of LLM generated responses to user queries.",
"tags": ["tools"],
"sizes": ["70b"],
},
{
"name": "yarn-llama2",
"icon": "assets/models_logo/llama.png",
"description": "An extension of Llama 2 that supports a context of up to 128k tokens.",
"tags": [],
"sizes": ["7b", "13b"],
},
{
"name": "opencoder",
"icon": "assets/models_logo/ailogo.jpg",
"description": "OpenCoder is an open and reproducible code LLM family which includes 1.5B and 8B models, supporting chat in English and Chinese languages.",
"tags": [],
"sizes": ["1.5b", "8b"],
},
{
"name": "dolphin-phi",
"icon": "assets/models_logo/dolphin.png",
"description": "2.7B uncensored Dolphin model by Eric Hartford, based on the Phi language model by Microsoft Research.",
"tags": [],
"sizes": ["2.7b"],
},
{
"name": "llama3-groq-tool-use",
"icon": "assets/models_logo/llama.png",
"description": "A series of models from Groq that represent a significant advancement in open-source AI capabilities for tool use/function calling.",
"tags": ["tools"],
"sizes": ["8b", "70b"],
},
{
"name": "exaone-deep",
"icon": "assets/models_logo/ailogo.jpg",
"description": "EXAONE Deep exhibits superior capabilities in various reasoning tasks including math and coding benchmarks, ranging from 2.4B to 32B parameters developed and released by LG AI Research.",
"tags": [],
"sizes": ["2.4b", "7.8b", "32b"],
},
{
"name": "wizardlm",
"icon": "assets/models_logo/wizardlm.png",
"description": "General use model based on Llama 2.",
"tags": [],
"sizes": [],
},
{
"name": "paraphrase-multilingual",
"icon": "assets/models_logo/ailogo.jpg",
"description": "Sentence-transformers model that can be used for tasks like clustering or semantic search.",
"tags": ["embedding"],
"sizes": ["278m"],
},
{
"name": "wizardlm-uncensored",
"icon": "assets/models_logo/wizardlm.png",
"description": "Uncensored version of Wizard LM model.",
"tags": [],
"sizes": ["13b"],
},
{
"name": "aya-expanse",
"icon": "assets/models_logo/ailogo.jpg",
"description": "Cohere For AI's language models trained to perform well across 23 different languages.",
"tags": ["tools"],
"sizes": ["8b", "32b"],
},
{
"name": "orca2",
"icon": "assets/models_logo/ailogo.jpg",
"description": "Orca 2 is built by Microsoft research, and are a fine-tuned version of Meta's Llama 2 models. The model is designed to excel particularly in reasoning.",
"tags": [],
"sizes": ["7b", "13b"],
},
{
"name": "smallthinker",
"icon": "assets/models_logo/ailogo.jpg",
"description": "A new small reasoning model fine-tuned from the Qwen 2.5 3B Instruct model.",
"tags": [],
"sizes": ["3b"],
},
{
"name": "falcon3",
"icon": "assets/models_logo/ailogo.jpg",
"description": "A family of efficient AI models under 10B parameters performant in science, math, and coding through innovative training techniques.",
"tags": [],
"sizes": ["1b", "3b", "7b", "10b"],
},
{
"name": "llama-guard3",
"icon": "assets/models_logo/llama.png",
"description": "Llama Guard 3 is a series of models fine-tuned for content safety classification of LLM inputs and responses.",
"tags": [],
"sizes": ["1b", "8b"],
},
{
"name": "granite-embedding",
"icon": "assets/models_logo/granite.png",
"description": "The IBM Granite Embedding 30M and 278M models are text-only dense biencoder embedding models, with 30M available in English only and 278M serving multilingual use cases.",
"tags": ["embedding"],
"sizes": ["30m", "278m"],
},
{
"name": "medllama2",
"icon": "assets/models_logo/llama.png",
"description": "Fine-tuned Llama 2 model to answer medical questions based on an open source medical dataset.",
"tags": [],
"sizes": ["7b"],
},
{
"name": "nous-hermes2-mixtral",
"icon": "assets/models_logo/hermes.png",
"description": "The Nous Hermes 2 model from Nous Research, now trained over Mixtral.",
"tags": [],
"sizes": ["8x7b"],
},
{
"name": "stable-beluga",
"icon": "assets/models_logo/ailogo.jpg",
"description": "Llama 2 based model fine tuned on an Orca-style dataset. Originally called Free Willy.",
"tags": [],
"sizes": ["7b", "13b", "70b"],
},
{
"name": "meditron",
"icon": "assets/models_logo/llama.png",
"description": "Open-source medical large language model adapted from Llama 2 to the medical domain.",
"tags": [],
"sizes": ["7b", "70b"],
},
{
"name": "granite3-moe",
"icon": "assets/models_logo/granite.png",
"description": "The IBM Granite 1B and 3B models are the first mixture of experts (MoE) Granite models from IBM designed for low latency usage.",
"tags": ["tools"],
"sizes": ["1b", "3b"],
},
{
"name": "r1-1776",
"icon": "assets/models_logo/deepseek.png",
"description": "A version of the DeepSeek-R1 model that has been post trained to provide unbiased, accurate, and factual information by Perplexity.",
"tags": [],
"sizes": ["70b", "671b"],
},
{
"name": "deepseek-v2.5",
"icon": "assets/models_logo/deepseek.png",
"description": "An upgraded version of DeepSeek-V2 that integrates the general and coding abilities of both DeepSeek-V2-Chat and DeepSeek-Coder-V2-Instruct.",
"tags": [],
"sizes": ["236b"],
},
{
"name": "granite3.1-moe",
"icon": "assets/models_logo/granite.png",
"description": "The IBM Granite 1B and 3B models are long-context mixture of experts (MoE) Granite models from IBM designed for low latency usage.",
"tags": ["tools"],
"sizes": ["1b", "3b"],
},
{
"name": "reader-lm",
"icon": "assets/models_logo/ailogo.jpg",
"description": "A series of models that convert HTML content to Markdown content, which is useful for content conversion tasks.",
"tags": [],
"sizes": ["0.5b", "1.5b"],
},
{
"name": "mistral-small3.2",
"icon": "assets/models_logo/mistral.png",
"description": "An update to Mistral Small that improves on function calling, instruction following, and less repetition errors.",
"tags": ["vision", "tools"],