mirror of
https://github.com/altstackHQ/altstack-data.git
synced 2026-04-17 19:53:12 +02:00
9193 lines
220 KiB
JSON
9193 lines
220 KiB
JSON
[
|
|
{
|
|
"slug": "qwen2.5-7b-instruct",
|
|
"name": "Qwen2.5 7B Instruct",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct",
|
|
"description": "Open source model Qwen/Qwen2.5-7B-Instruct. 1073 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 1073,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen2",
|
|
"chat",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2309.00071",
|
|
"arxiv:2407.10671",
|
|
"base_model:Qwen/Qwen2.5-7B",
|
|
"base_model:finetune:Qwen/Qwen2.5-7B",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 5,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 7,
|
|
"parameters_active_b": 7,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-0.6b",
|
|
"name": "Qwen3 0.6B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen3-0.6B",
|
|
"description": "Open source model Qwen/Qwen3-0.6B. 1083 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 1083,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen3",
|
|
"conversational",
|
|
"arxiv:2505.09388",
|
|
"base_model:Qwen/Qwen3-0.6B-Base",
|
|
"base_model:finetune:Qwen/Qwen3-0.6B-Base",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 4,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 6,
|
|
"parameters_active_b": 6,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "gpt2",
|
|
"name": "Gpt2",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/openai-community/gpt2",
|
|
"description": "Open source model openai-community/gpt2. 3114 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 3114,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"pytorch",
|
|
"tf",
|
|
"jax",
|
|
"tflite",
|
|
"rust",
|
|
"onnx",
|
|
"safetensors",
|
|
"gpt2",
|
|
"exbert",
|
|
"en",
|
|
"doi:10.57967/hf/0039",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen2.5-1.5b-instruct",
|
|
"name": "Qwen2.5 1.5B Instruct",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct",
|
|
"description": "Open source model Qwen/Qwen2.5-1.5B-Instruct. 617 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 617,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen2",
|
|
"chat",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2407.10671",
|
|
"base_model:Qwen/Qwen2.5-1.5B",
|
|
"base_model:finetune:Qwen/Qwen2.5-1.5B",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 4,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 5,
|
|
"parameters_active_b": 5,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen2.5-3b-instruct",
|
|
"name": "Qwen2.5 3B Instruct",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen2.5-3B-Instruct",
|
|
"description": "Open source model Qwen/Qwen2.5-3B-Instruct. 404 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 404,
|
|
"language": "Python",
|
|
"license": "other",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen2",
|
|
"chat",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2407.10671",
|
|
"base_model:Qwen/Qwen2.5-3B",
|
|
"base_model:finetune:Qwen/Qwen2.5-3B",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 2,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 3,
|
|
"parameters_active_b": 3,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "llama-3.1-8b-instruct",
|
|
"name": "Llama 3.1 8B Instruct",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct",
|
|
"description": "Open source model meta-llama/Llama-3.1-8B-Instruct. 5467 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 5467,
|
|
"language": "Python",
|
|
"license": "llama3.1",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"llama",
|
|
"facebook",
|
|
"meta",
|
|
"pytorch",
|
|
"llama-3",
|
|
"conversational",
|
|
"en",
|
|
"de",
|
|
"fr",
|
|
"it",
|
|
"pt",
|
|
"hi",
|
|
"es",
|
|
"th",
|
|
"arxiv:2204.05149",
|
|
"base_model:meta-llama/Llama-3.1-8B",
|
|
"base_model:finetune:meta-llama/Llama-3.1-8B",
|
|
"eval-results",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 6,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 8,
|
|
"parameters_active_b": 8,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "gpt-oss-20b",
|
|
"name": "Gpt Oss 20B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/openai/gpt-oss-20b",
|
|
"description": "Open source model openai/gpt-oss-20b. 4378 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 4378,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"gpt_oss",
|
|
"vllm",
|
|
"conversational",
|
|
"arxiv:2508.10925",
|
|
"endpoints_compatible",
|
|
"8-bit",
|
|
"mxfp4",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "16GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 14,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 20,
|
|
"parameters_active_b": 20,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen2.5-0.5b-instruct",
|
|
"name": "Qwen2.5 0.5B Instruct",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct",
|
|
"description": "Open source model Qwen/Qwen2.5-0.5B-Instruct. 463 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 463,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen2",
|
|
"chat",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2407.10671",
|
|
"base_model:Qwen/Qwen2.5-0.5B",
|
|
"base_model:finetune:Qwen/Qwen2.5-0.5B",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 4,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 5,
|
|
"parameters_active_b": 5,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-4b",
|
|
"name": "Qwen3 4B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen3-4B",
|
|
"description": "Open source model Qwen/Qwen3-4B. 552 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 552,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen3",
|
|
"conversational",
|
|
"arxiv:2309.00071",
|
|
"arxiv:2505.09388",
|
|
"base_model:Qwen/Qwen3-4B-Base",
|
|
"base_model:finetune:Qwen/Qwen3-4B-Base",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 3,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 4,
|
|
"parameters_active_b": 4,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-8b",
|
|
"name": "Qwen3 8B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen3-8B",
|
|
"description": "Open source model Qwen/Qwen3-8B. 940 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 940,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen3",
|
|
"conversational",
|
|
"arxiv:2309.00071",
|
|
"arxiv:2505.09388",
|
|
"base_model:Qwen/Qwen3-8B-Base",
|
|
"base_model:finetune:Qwen/Qwen3-8B-Base",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 6,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 8,
|
|
"parameters_active_b": 8,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen2.5-32b-instruct",
|
|
"name": "Qwen2.5 32B Instruct",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen2.5-32B-Instruct",
|
|
"description": "Open source model Qwen/Qwen2.5-32B-Instruct. 328 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 328,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen2",
|
|
"chat",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2309.00071",
|
|
"arxiv:2407.10671",
|
|
"base_model:Qwen/Qwen2.5-32B",
|
|
"base_model:finetune:Qwen/Qwen2.5-32B",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "24GB+ VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 22,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 32,
|
|
"parameters_active_b": 32,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "opt-125m",
|
|
"name": "Opt 125M",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/facebook/opt-125m",
|
|
"description": "Open source model facebook/opt-125m. 233 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 233,
|
|
"language": "Python",
|
|
"license": "other",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"pytorch",
|
|
"tf",
|
|
"jax",
|
|
"opt",
|
|
"en",
|
|
"arxiv:2205.01068",
|
|
"arxiv:2005.14165",
|
|
"text-generation-inference",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-1.7b",
|
|
"name": "Qwen3 1.7B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen3-1.7B",
|
|
"description": "Open source model Qwen/Qwen3-1.7B. 422 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 422,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen3",
|
|
"conversational",
|
|
"arxiv:2505.09388",
|
|
"base_model:Qwen/Qwen3-1.7B-Base",
|
|
"base_model:finetune:Qwen/Qwen3-1.7B-Base",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 5,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 7,
|
|
"parameters_active_b": 7,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "tiny-qwen2forcausallm-2.5",
|
|
"name": "Tiny Qwen2Forcausallm 2.5",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
|
|
"description": "Open source model trl-internal-testing/tiny-Qwen2ForCausalLM-2.5. 3 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 3,
|
|
"language": "Python",
|
|
"license": "unknown",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen2",
|
|
"trl",
|
|
"conversational",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "dolphin-2.9.1-yi-1.5-34b",
|
|
"name": "Dolphin 2.9.1 Yi 1.5 34B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/dphn/dolphin-2.9.1-yi-1.5-34b",
|
|
"description": "Open source model dphn/dolphin-2.9.1-yi-1.5-34b. 54 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 54,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"llama",
|
|
"generated_from_trainer",
|
|
"axolotl",
|
|
"conversational",
|
|
"dataset:cognitivecomputations/Dolphin-2.9",
|
|
"dataset:teknium/OpenHermes-2.5",
|
|
"dataset:m-a-p/CodeFeedback-Filtered-Instruction",
|
|
"dataset:cognitivecomputations/dolphin-coder",
|
|
"dataset:cognitivecomputations/samantha-data",
|
|
"dataset:microsoft/orca-math-word-problems-200k",
|
|
"dataset:Locutusque/function-calling-chatml",
|
|
"dataset:internlm/Agent-FLAN",
|
|
"base_model:01-ai/Yi-1.5-34B",
|
|
"base_model:finetune:01-ai/Yi-1.5-34B",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "24GB+ VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 24,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 34,
|
|
"parameters_active_b": 34,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-embedding-0.6b",
|
|
"name": "Qwen3 Embedding 0.6B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen3-Embedding-0.6B",
|
|
"description": "Open source model Qwen/Qwen3-Embedding-0.6B. 879 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 879,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"sentence-transformers",
|
|
"safetensors",
|
|
"qwen3",
|
|
"transformers",
|
|
"sentence-similarity",
|
|
"feature-extraction",
|
|
"text-embeddings-inference",
|
|
"arxiv:2506.05176",
|
|
"base_model:Qwen/Qwen3-0.6B-Base",
|
|
"base_model:finetune:Qwen/Qwen3-0.6B-Base",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 4,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 6,
|
|
"parameters_active_b": 6,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "gpt-oss-120b",
|
|
"name": "Gpt Oss 120B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"description": "Open source model openai/gpt-oss-120b. 4503 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 4503,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"gpt_oss",
|
|
"vllm",
|
|
"conversational",
|
|
"arxiv:2508.10925",
|
|
"endpoints_compatible",
|
|
"8-bit",
|
|
"mxfp4",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "24GB+ VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 84,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 120,
|
|
"parameters_active_b": 120,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-4b-instruct-2507",
|
|
"name": "Qwen3 4B Instruct 2507",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507",
|
|
"description": "Open source model Qwen/Qwen3-4B-Instruct-2507. 730 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 730,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen3",
|
|
"conversational",
|
|
"arxiv:2505.09388",
|
|
"eval-results",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 3,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 4,
|
|
"parameters_active_b": 4,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "moondream2",
|
|
"name": "Moondream2",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/vikhyatk/moondream2",
|
|
"description": "Open source model vikhyatk/moondream2. 1373 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 1373,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"moondream1",
|
|
"image-text-to-text",
|
|
"custom_code",
|
|
"doi:10.57967/hf/6762",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "llama-3.2-1b-instruct",
|
|
"name": "Llama 3.2 1B Instruct",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct",
|
|
"description": "Open source model meta-llama/Llama-3.2-1B-Instruct. 1292 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 1292,
|
|
"language": "Python",
|
|
"license": "llama3.2",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"llama",
|
|
"facebook",
|
|
"meta",
|
|
"pytorch",
|
|
"llama-3",
|
|
"conversational",
|
|
"en",
|
|
"de",
|
|
"fr",
|
|
"it",
|
|
"pt",
|
|
"hi",
|
|
"es",
|
|
"th",
|
|
"arxiv:2204.05149",
|
|
"arxiv:2405.16406",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 1,
|
|
"parameters_active_b": 1,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen2-1.5b-instruct",
|
|
"name": "Qwen2 1.5B Instruct",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen2-1.5B-Instruct",
|
|
"description": "Open source model Qwen/Qwen2-1.5B-Instruct. 158 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 158,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen2",
|
|
"chat",
|
|
"conversational",
|
|
"en",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 4,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 5,
|
|
"parameters_active_b": 5,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen2.5-coder-0.5b-instruct",
|
|
"name": "Qwen2.5 Coder 0.5B Instruct",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen2.5-Coder-0.5B-Instruct",
|
|
"description": "Open source model Qwen/Qwen2.5-Coder-0.5B-Instruct. 64 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 64,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen2",
|
|
"code",
|
|
"codeqwen",
|
|
"chat",
|
|
"qwen",
|
|
"qwen-coder",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2409.12186",
|
|
"arxiv:2407.10671",
|
|
"base_model:Qwen/Qwen2.5-Coder-0.5B",
|
|
"base_model:finetune:Qwen/Qwen2.5-Coder-0.5B",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 4,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 5,
|
|
"parameters_active_b": 5,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "kimi-k2.5",
|
|
"name": "Kimi K2.5",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/mlx-community/Kimi-K2.5",
|
|
"description": "Open source model mlx-community/Kimi-K2.5. 28 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 28,
|
|
"language": "Python",
|
|
"license": "other",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"mlx",
|
|
"safetensors",
|
|
"kimi_k25",
|
|
"conversational",
|
|
"custom_code",
|
|
"base_model:moonshotai/Kimi-K2.5",
|
|
"base_model:quantized:moonshotai/Kimi-K2.5",
|
|
"4-bit",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "mistral-7b-instruct-v0.2",
|
|
"name": "Mistral 7B Instruct V0.2",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2",
|
|
"description": "Open source model mistralai/Mistral-7B-Instruct-v0.2. 3075 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 3075,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"pytorch",
|
|
"safetensors",
|
|
"mistral",
|
|
"finetuned",
|
|
"mistral-common",
|
|
"conversational",
|
|
"arxiv:2310.06825",
|
|
"text-generation-inference",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 5,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 7,
|
|
"parameters_active_b": 7,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-30b-a3b-instruct-2507",
|
|
"name": "Qwen3 30B A3B Instruct 2507",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen3-30B-A3B-Instruct-2507",
|
|
"description": "Open source model Qwen/Qwen3-30B-A3B-Instruct-2507. 766 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 766,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen3_moe",
|
|
"conversational",
|
|
"arxiv:2402.17463",
|
|
"arxiv:2407.02490",
|
|
"arxiv:2501.15383",
|
|
"arxiv:2404.06654",
|
|
"arxiv:2505.09388",
|
|
"eval-results",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "24GB+ VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 21,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 30,
|
|
"parameters_active_b": 30,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "llm-jp-3-3.7b-instruct",
|
|
"name": "Llm Jp 3 3.7B Instruct",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/llm-jp/llm-jp-3-3.7b-instruct",
|
|
"description": "Open source model llm-jp/llm-jp-3-3.7b-instruct. 13 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 13,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"llama",
|
|
"conversational",
|
|
"en",
|
|
"ja",
|
|
"text-generation-inference",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 5,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 7,
|
|
"parameters_active_b": 7,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "llama-3.2-3b-instruct",
|
|
"name": "Llama 3.2 3B Instruct",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct",
|
|
"description": "Open source model meta-llama/Llama-3.2-3B-Instruct. 1986 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 1986,
|
|
"language": "Python",
|
|
"license": "llama3.2",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"llama",
|
|
"facebook",
|
|
"meta",
|
|
"pytorch",
|
|
"llama-3",
|
|
"conversational",
|
|
"en",
|
|
"de",
|
|
"fr",
|
|
"it",
|
|
"pt",
|
|
"hi",
|
|
"es",
|
|
"th",
|
|
"arxiv:2204.05149",
|
|
"arxiv:2405.16406",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 2,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 3,
|
|
"parameters_active_b": 3,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "distilgpt2",
|
|
"name": "Distilgpt2",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/distilbert/distilgpt2",
|
|
"description": "Open source model distilbert/distilgpt2. 609 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 609,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"pytorch",
|
|
"tf",
|
|
"jax",
|
|
"tflite",
|
|
"rust",
|
|
"coreml",
|
|
"safetensors",
|
|
"gpt2",
|
|
"exbert",
|
|
"en",
|
|
"dataset:openwebtext",
|
|
"arxiv:1910.01108",
|
|
"arxiv:2201.08542",
|
|
"arxiv:2203.12574",
|
|
"arxiv:1910.09700",
|
|
"arxiv:1503.02531",
|
|
"model-index",
|
|
"co2_eq_emissions",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-embedding-8b",
|
|
"name": "Qwen3 Embedding 8B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen3-Embedding-8B",
|
|
"description": "Open source model Qwen/Qwen3-Embedding-8B. 584 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 584,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"sentence-transformers",
|
|
"safetensors",
|
|
"qwen3",
|
|
"transformers",
|
|
"sentence-similarity",
|
|
"feature-extraction",
|
|
"text-embeddings-inference",
|
|
"arxiv:2506.05176",
|
|
"base_model:Qwen/Qwen3-8B-Base",
|
|
"base_model:finetune:Qwen/Qwen3-8B-Base",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 6,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 8,
|
|
"parameters_active_b": 8,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "meta-llama-3-8b",
|
|
"name": "Meta Llama 3 8B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/meta-llama/Meta-Llama-3-8B",
|
|
"description": "Open source model meta-llama/Meta-Llama-3-8B. 6458 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 6458,
|
|
"language": "Python",
|
|
"license": "llama3",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"llama",
|
|
"facebook",
|
|
"meta",
|
|
"pytorch",
|
|
"llama-3",
|
|
"en",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 6,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 8,
|
|
"parameters_active_b": 8,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "tinyllama-1.1b-chat-v1.0",
|
|
"name": "Tinyllama 1.1B Chat V1.0",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
|
"description": "Open source model TinyLlama/TinyLlama-1.1B-Chat-v1.0. 1526 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 1526,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"llama",
|
|
"conversational",
|
|
"en",
|
|
"dataset:cerebras/SlimPajama-627B",
|
|
"dataset:bigcode/starcoderdata",
|
|
"dataset:HuggingFaceH4/ultrachat_200k",
|
|
"dataset:HuggingFaceH4/ultrafeedback_binarized",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 1,
|
|
"parameters_active_b": 1,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "glm-4.7-flash",
|
|
"name": "Glm 4.7 Flash",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/zai-org/GLM-4.7-Flash",
|
|
"description": "Open source model zai-org/GLM-4.7-Flash. 1538 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 1538,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"glm4_moe_lite",
|
|
"conversational",
|
|
"en",
|
|
"zh",
|
|
"arxiv:2508.06471",
|
|
"eval-results",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "llama-3.2-1b",
|
|
"name": "Llama 3.2 1B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/meta-llama/Llama-3.2-1B",
|
|
"description": "Open source model meta-llama/Llama-3.2-1B. 2295 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 2295,
|
|
"language": "Python",
|
|
"license": "llama3.2",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"llama",
|
|
"facebook",
|
|
"meta",
|
|
"pytorch",
|
|
"llama-3",
|
|
"en",
|
|
"de",
|
|
"fr",
|
|
"it",
|
|
"pt",
|
|
"hi",
|
|
"es",
|
|
"th",
|
|
"arxiv:2204.05149",
|
|
"arxiv:2405.16406",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 1,
|
|
"parameters_active_b": 1,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-32b",
|
|
"name": "Qwen3 32B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen3-32B",
|
|
"description": "Open source model Qwen/Qwen3-32B. 656 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 656,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen3",
|
|
"conversational",
|
|
"arxiv:2309.00071",
|
|
"arxiv:2505.09388",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "24GB+ VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 22,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 32,
|
|
"parameters_active_b": 32,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "llama-3.2-1b-instruct-fp8-dynamic",
|
|
"name": "Llama 3.2 1B Instruct Fp8 Dynamic",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/RedHatAI/Llama-3.2-1B-Instruct-FP8-dynamic",
|
|
"description": "Open source model RedHatAI/Llama-3.2-1B-Instruct-FP8-dynamic. 3 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 3,
|
|
"language": "Python",
|
|
"license": "llama3.2",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"safetensors",
|
|
"llama",
|
|
"fp8",
|
|
"vllm",
|
|
"conversational",
|
|
"en",
|
|
"de",
|
|
"fr",
|
|
"it",
|
|
"pt",
|
|
"hi",
|
|
"es",
|
|
"th",
|
|
"base_model:meta-llama/Llama-3.2-1B-Instruct",
|
|
"base_model:quantized:meta-llama/Llama-3.2-1B-Instruct",
|
|
"compressed-tensors",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 1,
|
|
"parameters_active_b": 1,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen2.5-coder-1.5b-instruct",
|
|
"name": "Qwen2.5 Coder 1.5B Instruct",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct",
|
|
"description": "Open source model Qwen/Qwen2.5-Coder-1.5B-Instruct. 106 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 106,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen2",
|
|
"code",
|
|
"codeqwen",
|
|
"chat",
|
|
"qwen",
|
|
"qwen-coder",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2409.12186",
|
|
"arxiv:2407.10671",
|
|
"base_model:Qwen/Qwen2.5-Coder-1.5B",
|
|
"base_model:finetune:Qwen/Qwen2.5-Coder-1.5B",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 4,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 5,
|
|
"parameters_active_b": 5,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "meta-llama-3-8b-instruct",
|
|
"name": "Meta Llama 3 8B Instruct",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct",
|
|
"description": "Open source model meta-llama/Meta-Llama-3-8B-Instruct. 4380 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 4380,
|
|
"language": "Python",
|
|
"license": "llama3",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"llama",
|
|
"facebook",
|
|
"meta",
|
|
"pytorch",
|
|
"llama-3",
|
|
"conversational",
|
|
"en",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 6,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 8,
|
|
"parameters_active_b": 8,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "gemma-3-1b-it",
|
|
"name": "Gemma 3 1B It",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/google/gemma-3-1b-it",
|
|
"description": "Open source model google/gemma-3-1b-it. 842 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 842,
|
|
"language": "Python",
|
|
"license": "gemma",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"gemma3_text",
|
|
"conversational",
|
|
"arxiv:1905.07830",
|
|
"arxiv:1905.10044",
|
|
"arxiv:1911.11641",
|
|
"arxiv:1904.09728",
|
|
"arxiv:1705.03551",
|
|
"arxiv:1911.01547",
|
|
"arxiv:1907.10641",
|
|
"arxiv:1903.00161",
|
|
"arxiv:2009.03300",
|
|
"arxiv:2304.06364",
|
|
"arxiv:2103.03874",
|
|
"arxiv:2110.14168",
|
|
"arxiv:2311.12022",
|
|
"arxiv:2108.07732",
|
|
"arxiv:2107.03374",
|
|
"arxiv:2210.03057",
|
|
"arxiv:2106.03193",
|
|
"arxiv:1910.11856",
|
|
"arxiv:2502.12404",
|
|
"arxiv:2502.21228",
|
|
"arxiv:2404.16816",
|
|
"arxiv:2104.12756",
|
|
"arxiv:2311.16502",
|
|
"arxiv:2203.10244",
|
|
"arxiv:2404.12390",
|
|
"arxiv:1810.12440",
|
|
"arxiv:1908.02660",
|
|
"arxiv:2312.11805",
|
|
"base_model:google/gemma-3-1b-pt",
|
|
"base_model:finetune:google/gemma-3-1b-pt",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 1,
|
|
"parameters_active_b": 1,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "phi-2",
|
|
"name": "Phi 2",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/microsoft/phi-2",
|
|
"description": "Open source model microsoft/phi-2. 3425 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 3425,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"phi",
|
|
"nlp",
|
|
"code",
|
|
"en",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen2.5-coder-7b-instruct",
|
|
"name": "Qwen2.5 Coder 7B Instruct",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct",
|
|
"description": "Open source model Qwen/Qwen2.5-Coder-7B-Instruct. 646 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 646,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen2",
|
|
"code",
|
|
"codeqwen",
|
|
"chat",
|
|
"qwen",
|
|
"qwen-coder",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2409.12186",
|
|
"arxiv:2309.00071",
|
|
"arxiv:2407.10671",
|
|
"base_model:Qwen/Qwen2.5-Coder-7B",
|
|
"base_model:finetune:Qwen/Qwen2.5-Coder-7B",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 5,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 7,
|
|
"parameters_active_b": 7,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen2.5-7b",
|
|
"name": "Qwen2.5 7B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen2.5-7B",
|
|
"description": "Open source model Qwen/Qwen2.5-7B. 264 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 264,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen2",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2407.10671",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 5,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 7,
|
|
"parameters_active_b": 7,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "deepseek-r1-distill-qwen-1.5b",
|
|
"name": "Deepseek R1 Distill Qwen 1.5B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
|
|
"description": "Open source model deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B. 1446 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 1446,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen2",
|
|
"conversational",
|
|
"arxiv:2501.12948",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 4,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 5,
|
|
"parameters_active_b": 5,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "deepseek-v3",
|
|
"name": "Deepseek V3",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/deepseek-ai/DeepSeek-V3",
|
|
"description": "Open source model deepseek-ai/DeepSeek-V3. 4024 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 4024,
|
|
"language": "Python",
|
|
"license": "unknown",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"deepseek_v3",
|
|
"conversational",
|
|
"custom_code",
|
|
"arxiv:2412.19437",
|
|
"eval-results",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"fp8",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "gpt2-large",
|
|
"name": "Gpt2 Large",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/openai-community/gpt2-large",
|
|
"description": "Open source model openai-community/gpt2-large. 344 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 344,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"pytorch",
|
|
"tf",
|
|
"jax",
|
|
"rust",
|
|
"onnx",
|
|
"safetensors",
|
|
"gpt2",
|
|
"en",
|
|
"arxiv:1910.09700",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "glm-4.7-flash-mlx-8bit",
|
|
"name": "Glm 4.7 Flash Mlx 8Bit",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/lmstudio-community/GLM-4.7-Flash-MLX-8bit",
|
|
"description": "Open source model lmstudio-community/GLM-4.7-Flash-MLX-8bit. 9 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 9,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"glm4_moe_lite",
|
|
"mlx",
|
|
"conversational",
|
|
"en",
|
|
"zh",
|
|
"base_model:zai-org/GLM-4.7-Flash",
|
|
"base_model:quantized:zai-org/GLM-4.7-Flash",
|
|
"endpoints_compatible",
|
|
"8-bit",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 6,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 8,
|
|
"parameters_active_b": 8,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "glm-4.7-flash-mlx-6bit",
|
|
"name": "Glm 4.7 Flash Mlx 6Bit",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/lmstudio-community/GLM-4.7-Flash-MLX-6bit",
|
|
"description": "Open source model lmstudio-community/GLM-4.7-Flash-MLX-6bit. 7 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 7,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"glm4_moe_lite",
|
|
"mlx",
|
|
"conversational",
|
|
"en",
|
|
"zh",
|
|
"base_model:zai-org/GLM-4.7-Flash",
|
|
"base_model:quantized:zai-org/GLM-4.7-Flash",
|
|
"endpoints_compatible",
|
|
"6-bit",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 4,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 6,
|
|
"parameters_active_b": 6,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-0.6b-fp8",
|
|
"name": "Qwen3 0.6B Fp8",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen3-0.6B-FP8",
|
|
"description": "Open source model Qwen/Qwen3-0.6B-FP8. 56 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 56,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen3",
|
|
"conversational",
|
|
"arxiv:2505.09388",
|
|
"base_model:Qwen/Qwen3-0.6B",
|
|
"base_model:quantized:Qwen/Qwen3-0.6B",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"fp8",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 4,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 6,
|
|
"parameters_active_b": 6,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "llama-3.1-8b",
|
|
"name": "Llama 3.1 8B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/meta-llama/Llama-3.1-8B",
|
|
"description": "Open source model meta-llama/Llama-3.1-8B. 2065 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 2065,
|
|
"language": "Python",
|
|
"license": "llama3.1",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"llama",
|
|
"facebook",
|
|
"meta",
|
|
"pytorch",
|
|
"llama-3",
|
|
"en",
|
|
"de",
|
|
"fr",
|
|
"it",
|
|
"pt",
|
|
"hi",
|
|
"es",
|
|
"th",
|
|
"arxiv:2204.05149",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 6,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 8,
|
|
"parameters_active_b": 8,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "pythia-160m",
|
|
"name": "Pythia 160M",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/EleutherAI/pythia-160m",
|
|
"description": "Open source model EleutherAI/pythia-160m. 38 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 38,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"pytorch",
|
|
"safetensors",
|
|
"gpt_neox",
|
|
"causal-lm",
|
|
"pythia",
|
|
"en",
|
|
"dataset:EleutherAI/pile",
|
|
"arxiv:2304.01373",
|
|
"arxiv:2101.00027",
|
|
"arxiv:2201.07311",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "deepseek-r1-distill-qwen-32b",
|
|
"name": "Deepseek R1 Distill Qwen 32B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
|
|
"description": "Open source model deepseek-ai/DeepSeek-R1-Distill-Qwen-32B. 1517 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 1517,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen2",
|
|
"conversational",
|
|
"arxiv:2501.12948",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "24GB+ VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 22,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 32,
|
|
"parameters_active_b": 32,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "hunyuanocr",
|
|
"name": "Hunyuanocr",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/tencent/HunyuanOCR",
|
|
"description": "Open source model tencent/HunyuanOCR. 553 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 553,
|
|
"language": "Python",
|
|
"license": "other",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"hunyuan_vl",
|
|
"ocr",
|
|
"hunyuan",
|
|
"vision-language",
|
|
"image-to-text",
|
|
"1B",
|
|
"end-to-end",
|
|
"image-text-to-text",
|
|
"conversational",
|
|
"multilingual",
|
|
"arxiv:2511.19575",
|
|
"base_model:tencent/HunyuanOCR",
|
|
"base_model:finetune:tencent/HunyuanOCR",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-30b-a3b",
|
|
"name": "Qwen3 30B A3B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen3-30B-A3B",
|
|
"description": "Open source model Qwen/Qwen3-30B-A3B. 855 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 855,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen3_moe",
|
|
"conversational",
|
|
"arxiv:2309.00071",
|
|
"arxiv:2505.09388",
|
|
"base_model:Qwen/Qwen3-30B-A3B-Base",
|
|
"base_model:finetune:Qwen/Qwen3-30B-A3B-Base",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "24GB+ VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 21,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 30,
|
|
"parameters_active_b": 30,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen2.5-0.5b",
|
|
"name": "Qwen2.5 0.5B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen2.5-0.5B",
|
|
"description": "Open source model Qwen/Qwen2.5-0.5B. 372 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 372,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen2",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2407.10671",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 4,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 5,
|
|
"parameters_active_b": 5,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen2.5-32b-instruct-awq",
|
|
"name": "Qwen2.5 32B Instruct Awq",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen2.5-32B-Instruct-AWQ",
|
|
"description": "Open source model Qwen/Qwen2.5-32B-Instruct-AWQ. 94 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 94,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen2",
|
|
"chat",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2309.00071",
|
|
"arxiv:2407.10671",
|
|
"base_model:Qwen/Qwen2.5-32B-Instruct",
|
|
"base_model:quantized:Qwen/Qwen2.5-32B-Instruct",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"4-bit",
|
|
"awq",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "24GB+ VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 22,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 32,
|
|
"parameters_active_b": 32,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "nvidia-nemotron-3-nano-30b-a3b-fp8",
|
|
"name": "Nvidia Nemotron 3 Nano 30B A3B Fp8",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8",
|
|
"description": "Open source model nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8. 284 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 284,
|
|
"language": "Python",
|
|
"license": "other",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"nemotron_h",
|
|
"feature-extraction",
|
|
"nvidia",
|
|
"pytorch",
|
|
"conversational",
|
|
"custom_code",
|
|
"en",
|
|
"es",
|
|
"fr",
|
|
"de",
|
|
"ja",
|
|
"it",
|
|
"dataset:nvidia/Nemotron-Pretraining-Code-v1",
|
|
"dataset:nvidia/Nemotron-CC-v2",
|
|
"dataset:nvidia/Nemotron-Pretraining-SFT-v1",
|
|
"dataset:nvidia/Nemotron-CC-Math-v1",
|
|
"dataset:nvidia/Nemotron-Pretraining-Code-v2",
|
|
"dataset:nvidia/Nemotron-Pretraining-Specialized-v1",
|
|
"dataset:nvidia/Nemotron-CC-v2.1",
|
|
"dataset:nvidia/Nemotron-CC-Code-v1",
|
|
"dataset:nvidia/Nemotron-Pretraining-Dataset-sample",
|
|
"dataset:nvidia/Nemotron-Competitive-Programming-v1",
|
|
"dataset:nvidia/Nemotron-Math-v2",
|
|
"dataset:nvidia/Nemotron-Agentic-v1",
|
|
"dataset:nvidia/Nemotron-Math-Proofs-v1",
|
|
"dataset:nvidia/Nemotron-Instruction-Following-Chat-v1",
|
|
"dataset:nvidia/Nemotron-Science-v1",
|
|
"dataset:nvidia/Nemotron-3-Nano-RL-Training-Blend",
|
|
"arxiv:2512.20848",
|
|
"arxiv:2512.20856",
|
|
"base_model:nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
|
|
"base_model:quantized:nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
|
|
"eval-results",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "24GB+ VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 21,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 30,
|
|
"parameters_active_b": 30,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen2.5-14b-instruct",
|
|
"name": "Qwen2.5 14B Instruct",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen2.5-14B-Instruct",
|
|
"description": "Open source model Qwen/Qwen2.5-14B-Instruct. 312 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 312,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen2",
|
|
"chat",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2309.00071",
|
|
"arxiv:2407.10671",
|
|
"base_model:Qwen/Qwen2.5-14B",
|
|
"base_model:finetune:Qwen/Qwen2.5-14B",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "16GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 10,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 14,
|
|
"parameters_active_b": 14,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "nvidia-nemotron-3-nano-30b-a3b-bf16",
|
|
"name": "Nvidia Nemotron 3 Nano 30B A3B Bf16",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
|
|
"description": "Open source model nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16. 634 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 634,
|
|
"language": "Python",
|
|
"license": "other",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"nemotron_h",
|
|
"feature-extraction",
|
|
"nvidia",
|
|
"pytorch",
|
|
"conversational",
|
|
"custom_code",
|
|
"en",
|
|
"es",
|
|
"fr",
|
|
"de",
|
|
"ja",
|
|
"it",
|
|
"dataset:nvidia/Nemotron-Pretraining-Code-v1",
|
|
"dataset:nvidia/Nemotron-CC-v2",
|
|
"dataset:nvidia/Nemotron-Pretraining-SFT-v1",
|
|
"dataset:nvidia/Nemotron-CC-Math-v1",
|
|
"dataset:nvidia/Nemotron-Pretraining-Code-v2",
|
|
"dataset:nvidia/Nemotron-Pretraining-Specialized-v1",
|
|
"dataset:nvidia/Nemotron-CC-v2.1",
|
|
"dataset:nvidia/Nemotron-CC-Code-v1",
|
|
"dataset:nvidia/Nemotron-Pretraining-Dataset-sample",
|
|
"dataset:nvidia/Nemotron-Competitive-Programming-v1",
|
|
"dataset:nvidia/Nemotron-Math-v2",
|
|
"dataset:nvidia/Nemotron-Agentic-v1",
|
|
"dataset:nvidia/Nemotron-Math-Proofs-v1",
|
|
"dataset:nvidia/Nemotron-Instruction-Following-Chat-v1",
|
|
"dataset:nvidia/Nemotron-Science-v1",
|
|
"dataset:nvidia/Nemotron-3-Nano-RL-Training-Blend",
|
|
"arxiv:2512.20848",
|
|
"arxiv:2512.20856",
|
|
"eval-results",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "24GB+ VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 21,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 30,
|
|
"parameters_active_b": 30,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "openelm-1_1b-instruct",
|
|
"name": "Openelm 1_1B Instruct",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/apple/OpenELM-1_1B-Instruct",
|
|
"description": "Open source model apple/OpenELM-1_1B-Instruct. 72 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 72,
|
|
"language": "Python",
|
|
"license": "apple-amlr",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"openelm",
|
|
"custom_code",
|
|
"arxiv:2404.14619",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 1,
|
|
"parameters_active_b": 1,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "tiny-random-llamaforcausallm",
|
|
"name": "Tiny Random Llamaforcausallm",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/hmellor/tiny-random-LlamaForCausalLM",
|
|
"description": "Open source model hmellor/tiny-random-LlamaForCausalLM. 0 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 0,
|
|
"language": "Python",
|
|
"license": "unknown",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"llama",
|
|
"conversational",
|
|
"arxiv:1910.09700",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-next-80b-a3b-instruct",
|
|
"name": "Qwen3 Next 80B A3B Instruct",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Instruct",
|
|
"description": "Open source model Qwen/Qwen3-Next-80B-A3B-Instruct. 937 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 937,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen3_next",
|
|
"conversational",
|
|
"arxiv:2309.00071",
|
|
"arxiv:2404.06654",
|
|
"arxiv:2505.09388",
|
|
"arxiv:2501.15383",
|
|
"eval-results",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "24GB+ VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 56,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 80,
|
|
"parameters_active_b": 80,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "h2ovl-mississippi-800m",
|
|
"name": "H2Ovl Mississippi 800M",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/h2oai/h2ovl-mississippi-800m",
|
|
"description": "Open source model h2oai/h2ovl-mississippi-800m. 39 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 39,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"h2ovl_chat",
|
|
"feature-extraction",
|
|
"gpt",
|
|
"llm",
|
|
"multimodal large language model",
|
|
"ocr",
|
|
"conversational",
|
|
"custom_code",
|
|
"en",
|
|
"arxiv:2410.13611",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "bloomz-560m",
|
|
"name": "Bloomz 560M",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/bigscience/bloomz-560m",
|
|
"description": "Open source model bigscience/bloomz-560m. 137 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 137,
|
|
"language": "Python",
|
|
"license": "bigscience-bloom-rail-1.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"pytorch",
|
|
"tensorboard",
|
|
"safetensors",
|
|
"bloom",
|
|
"ak",
|
|
"ar",
|
|
"as",
|
|
"bm",
|
|
"bn",
|
|
"ca",
|
|
"code",
|
|
"en",
|
|
"es",
|
|
"eu",
|
|
"fon",
|
|
"fr",
|
|
"gu",
|
|
"hi",
|
|
"id",
|
|
"ig",
|
|
"ki",
|
|
"kn",
|
|
"lg",
|
|
"ln",
|
|
"ml",
|
|
"mr",
|
|
"ne",
|
|
"nso",
|
|
"ny",
|
|
"or",
|
|
"pa",
|
|
"pt",
|
|
"rn",
|
|
"rw",
|
|
"sn",
|
|
"st",
|
|
"sw",
|
|
"ta",
|
|
"te",
|
|
"tn",
|
|
"ts",
|
|
"tum",
|
|
"tw",
|
|
"ur",
|
|
"vi",
|
|
"wo",
|
|
"xh",
|
|
"yo",
|
|
"zh",
|
|
"zu",
|
|
"dataset:bigscience/xP3",
|
|
"arxiv:2211.01786",
|
|
"model-index",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen2.5-1.5b-quantized.w8a8",
|
|
"name": "Qwen2.5 1.5B Quantized.W8A8",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/RedHatAI/Qwen2.5-1.5B-quantized.w8a8",
|
|
"description": "Open source model RedHatAI/Qwen2.5-1.5B-quantized.w8a8. 2 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 2,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"safetensors",
|
|
"qwen2",
|
|
"chat",
|
|
"neuralmagic",
|
|
"llmcompressor",
|
|
"conversational",
|
|
"en",
|
|
"base_model:Qwen/Qwen2.5-1.5B",
|
|
"base_model:quantized:Qwen/Qwen2.5-1.5B",
|
|
"8-bit",
|
|
"compressed-tensors",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 4,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 5,
|
|
"parameters_active_b": 5,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "h2ovl-mississippi-2b",
|
|
"name": "H2Ovl Mississippi 2B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/h2oai/h2ovl-mississippi-2b",
|
|
"description": "Open source model h2oai/h2ovl-mississippi-2b. 40 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 40,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"h2ovl_chat",
|
|
"feature-extraction",
|
|
"gpt",
|
|
"llm",
|
|
"multimodal large language model",
|
|
"ocr",
|
|
"conversational",
|
|
"custom_code",
|
|
"en",
|
|
"arxiv:2410.13611",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 2,
|
|
"parameters_active_b": 2,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "llava-v1.5-7b",
|
|
"name": "Llava V1.5 7B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/liuhaotian/llava-v1.5-7b",
|
|
"description": "Open source model liuhaotian/llava-v1.5-7b. 537 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 537,
|
|
"language": "Python",
|
|
"license": "unknown",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"pytorch",
|
|
"llava",
|
|
"image-text-to-text",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 5,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 7,
|
|
"parameters_active_b": 7,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "t5-3b",
|
|
"name": "T5 3B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/google-t5/t5-3b",
|
|
"description": "Open source model google-t5/t5-3b. 51 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 51,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"pytorch",
|
|
"tf",
|
|
"safetensors",
|
|
"t5",
|
|
"summarization",
|
|
"translation",
|
|
"en",
|
|
"fr",
|
|
"ro",
|
|
"de",
|
|
"multilingual",
|
|
"dataset:c4",
|
|
"arxiv:1805.12471",
|
|
"arxiv:1708.00055",
|
|
"arxiv:1704.05426",
|
|
"arxiv:1606.05250",
|
|
"arxiv:1808.09121",
|
|
"arxiv:1810.12885",
|
|
"arxiv:1905.10044",
|
|
"arxiv:1910.09700",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 2,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 3,
|
|
"parameters_active_b": 3,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen2.5-14b-instruct-awq",
|
|
"name": "Qwen2.5 14B Instruct Awq",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-AWQ",
|
|
"description": "Open source model Qwen/Qwen2.5-14B-Instruct-AWQ. 27 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 27,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen2",
|
|
"chat",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2309.00071",
|
|
"arxiv:2407.10671",
|
|
"base_model:Qwen/Qwen2.5-14B-Instruct",
|
|
"base_model:quantized:Qwen/Qwen2.5-14B-Instruct",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"4-bit",
|
|
"awq",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "16GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 10,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 14,
|
|
"parameters_active_b": 14,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "llama-3.2-3b",
|
|
"name": "Llama 3.2 3B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/meta-llama/Llama-3.2-3B",
|
|
"description": "Open source model meta-llama/Llama-3.2-3B. 697 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 697,
|
|
"language": "Python",
|
|
"license": "llama3.2",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"llama",
|
|
"facebook",
|
|
"meta",
|
|
"pytorch",
|
|
"llama-3",
|
|
"en",
|
|
"de",
|
|
"fr",
|
|
"it",
|
|
"pt",
|
|
"hi",
|
|
"es",
|
|
"th",
|
|
"arxiv:2204.05149",
|
|
"arxiv:2405.16406",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 2,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 3,
|
|
"parameters_active_b": 3,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "phi-3-mini-4k-instruct-gptq-4bit",
|
|
"name": "Phi 3 Mini 4K Instruct Gptq 4Bit",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/kaitchup/Phi-3-mini-4k-instruct-gptq-4bit",
|
|
"description": "Open source model kaitchup/Phi-3-mini-4k-instruct-gptq-4bit. 2 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 2,
|
|
"language": "Python",
|
|
"license": "unknown",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"phi3",
|
|
"conversational",
|
|
"custom_code",
|
|
"arxiv:1910.09700",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"4-bit",
|
|
"gptq",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 3,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 4,
|
|
"parameters_active_b": 4,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen2.5-72b-instruct-awq",
|
|
"name": "Qwen2.5 72B Instruct Awq",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct-AWQ",
|
|
"description": "Open source model Qwen/Qwen2.5-72B-Instruct-AWQ. 74 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 74,
|
|
"language": "Python",
|
|
"license": "other",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen2",
|
|
"chat",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2309.00071",
|
|
"arxiv:2407.10671",
|
|
"base_model:Qwen/Qwen2.5-72B-Instruct",
|
|
"base_model:quantized:Qwen/Qwen2.5-72B-Instruct",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"4-bit",
|
|
"awq",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "24GB+ VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 50,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 72,
|
|
"parameters_active_b": 72,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "smollm2-135m",
|
|
"name": "Smollm2 135M",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/HuggingFaceTB/SmolLM2-135M",
|
|
"description": "Open source model HuggingFaceTB/SmolLM2-135M. 166 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 166,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"llama",
|
|
"en",
|
|
"arxiv:2502.02737",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "llama-3.3-70b-instruct",
|
|
"name": "Llama 3.3 70B Instruct",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct",
|
|
"description": "Open source model meta-llama/Llama-3.3-70B-Instruct. 2658 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 2658,
|
|
"language": "Python",
|
|
"license": "llama3.3",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"llama",
|
|
"facebook",
|
|
"meta",
|
|
"pytorch",
|
|
"llama-3",
|
|
"conversational",
|
|
"en",
|
|
"fr",
|
|
"it",
|
|
"pt",
|
|
"hi",
|
|
"es",
|
|
"th",
|
|
"de",
|
|
"arxiv:2204.05149",
|
|
"base_model:meta-llama/Llama-3.1-70B",
|
|
"base_model:finetune:meta-llama/Llama-3.1-70B",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "24GB+ VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 49,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 70,
|
|
"parameters_active_b": 70,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-30b-a3b-instruct-2507-fp8",
|
|
"name": "Qwen3 30B A3B Instruct 2507 Fp8",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen3-30B-A3B-Instruct-2507-FP8",
|
|
"description": "Open source model Qwen/Qwen3-30B-A3B-Instruct-2507-FP8. 112 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 112,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen3_moe",
|
|
"conversational",
|
|
"arxiv:2505.09388",
|
|
"base_model:Qwen/Qwen3-30B-A3B-Instruct-2507",
|
|
"base_model:quantized:Qwen/Qwen3-30B-A3B-Instruct-2507",
|
|
"endpoints_compatible",
|
|
"fp8",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "24GB+ VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 21,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 30,
|
|
"parameters_active_b": 30,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen2.5-coder-32b-instruct",
|
|
"name": "Qwen2.5 Coder 32B Instruct",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct",
|
|
"description": "Open source model Qwen/Qwen2.5-Coder-32B-Instruct. 1995 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 1995,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen2",
|
|
"code",
|
|
"codeqwen",
|
|
"chat",
|
|
"qwen",
|
|
"qwen-coder",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2409.12186",
|
|
"arxiv:2309.00071",
|
|
"arxiv:2407.10671",
|
|
"base_model:Qwen/Qwen2.5-Coder-32B",
|
|
"base_model:finetune:Qwen/Qwen2.5-Coder-32B",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "24GB+ VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 22,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 32,
|
|
"parameters_active_b": 32,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-235b-a22b-instruct-2507-fp8",
|
|
"name": "Qwen3 235B A22B Instruct 2507 Fp8",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen3-235B-A22B-Instruct-2507-FP8",
|
|
"description": "Open source model Qwen/Qwen3-235B-A22B-Instruct-2507-FP8. 145 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 145,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen3_moe",
|
|
"conversational",
|
|
"arxiv:2505.09388",
|
|
"base_model:Qwen/Qwen3-235B-A22B-Instruct-2507",
|
|
"base_model:quantized:Qwen/Qwen3-235B-A22B-Instruct-2507",
|
|
"endpoints_compatible",
|
|
"fp8",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "24GB+ VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 164,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 235,
|
|
"parameters_active_b": 235,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "deepseek-r1-distill-qwen-7b",
|
|
"name": "Deepseek R1 Distill Qwen 7B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
|
|
"description": "Open source model deepseek-ai/DeepSeek-R1-Distill-Qwen-7B. 787 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 787,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen2",
|
|
"conversational",
|
|
"arxiv:2501.12948",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 5,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 7,
|
|
"parameters_active_b": 7,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "phi-3-mini-4k-instruct",
|
|
"name": "Phi 3 Mini 4K Instruct",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct",
|
|
"description": "Open source model microsoft/Phi-3-mini-4k-instruct. 1386 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 1386,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"phi3",
|
|
"nlp",
|
|
"code",
|
|
"conversational",
|
|
"custom_code",
|
|
"en",
|
|
"fr",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-14b",
|
|
"name": "Qwen3 14B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen3-14B",
|
|
"description": "Open source model Qwen/Qwen3-14B. 366 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 366,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen3",
|
|
"conversational",
|
|
"arxiv:2309.00071",
|
|
"arxiv:2505.09388",
|
|
"base_model:Qwen/Qwen3-14B-Base",
|
|
"base_model:finetune:Qwen/Qwen3-14B-Base",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "16GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 10,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 14,
|
|
"parameters_active_b": 14,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen2.5-coder-1.5b",
|
|
"name": "Qwen2.5 Coder 1.5B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B",
|
|
"description": "Open source model Qwen/Qwen2.5-Coder-1.5B. 81 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 81,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen2",
|
|
"code",
|
|
"qwen",
|
|
"qwen-coder",
|
|
"codeqwen",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2409.12186",
|
|
"arxiv:2407.10671",
|
|
"base_model:Qwen/Qwen2.5-1.5B",
|
|
"base_model:finetune:Qwen/Qwen2.5-1.5B",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 4,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 5,
|
|
"parameters_active_b": 5,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "llama-3.1-70b-instruct",
|
|
"name": "Llama 3.1 70B Instruct",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct",
|
|
"description": "Open source model meta-llama/Llama-3.1-70B-Instruct. 890 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 890,
|
|
"language": "Python",
|
|
"license": "llama3.1",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"llama",
|
|
"facebook",
|
|
"meta",
|
|
"pytorch",
|
|
"llama-3",
|
|
"conversational",
|
|
"en",
|
|
"de",
|
|
"fr",
|
|
"it",
|
|
"pt",
|
|
"hi",
|
|
"es",
|
|
"th",
|
|
"arxiv:2204.05149",
|
|
"base_model:meta-llama/Llama-3.1-70B",
|
|
"base_model:finetune:meta-llama/Llama-3.1-70B",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "24GB+ VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 49,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 70,
|
|
"parameters_active_b": 70,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "hunyuanimage-3.0",
|
|
"name": "Hunyuanimage 3.0",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/tencent/HunyuanImage-3.0",
|
|
"description": "Open source model tencent/HunyuanImage-3.0. 640 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 640,
|
|
"language": "Python",
|
|
"license": "other",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"hunyuan_image_3_moe",
|
|
"text-to-image",
|
|
"custom_code",
|
|
"arxiv:2509.23951",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen2.5-coder-7b-instruct-awq",
|
|
"name": "Qwen2.5 Coder 7B Instruct Awq",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct-AWQ",
|
|
"description": "Open source model Qwen/Qwen2.5-Coder-7B-Instruct-AWQ. 19 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 19,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen2",
|
|
"code",
|
|
"codeqwen",
|
|
"chat",
|
|
"qwen",
|
|
"qwen-coder",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2409.12186",
|
|
"arxiv:2309.00071",
|
|
"arxiv:2407.10671",
|
|
"base_model:Qwen/Qwen2.5-Coder-7B-Instruct",
|
|
"base_model:quantized:Qwen/Qwen2.5-Coder-7B-Instruct",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"4-bit",
|
|
"awq",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 5,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 7,
|
|
"parameters_active_b": 7,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-coder-30b-a3b-instruct",
|
|
"name": "Qwen3 Coder 30B A3B Instruct",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen3-Coder-30B-A3B-Instruct",
|
|
"description": "Open source model Qwen/Qwen3-Coder-30B-A3B-Instruct. 945 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 945,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen3_moe",
|
|
"conversational",
|
|
"arxiv:2505.09388",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "24GB+ VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 21,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 30,
|
|
"parameters_active_b": 30,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "deepseek-r1-0528",
|
|
"name": "Deepseek R1 0528",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
|
|
"description": "Open source model deepseek-ai/DeepSeek-R1-0528. 2400 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 2400,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"deepseek_v3",
|
|
"conversational",
|
|
"custom_code",
|
|
"arxiv:2501.12948",
|
|
"eval-results",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"fp8",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "tiny-random-llama-3",
|
|
"name": "Tiny Random Llama 3",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/llamafactory/tiny-random-Llama-3",
|
|
"description": "Open source model llamafactory/tiny-random-Llama-3. 3 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 3,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"llama",
|
|
"conversational",
|
|
"text-generation-inference",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen2.5-coder-32b-instruct-awq",
|
|
"name": "Qwen2.5 Coder 32B Instruct Awq",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct-AWQ",
|
|
"description": "Open source model Qwen/Qwen2.5-Coder-32B-Instruct-AWQ. 33 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 33,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen2",
|
|
"code",
|
|
"codeqwen",
|
|
"chat",
|
|
"qwen",
|
|
"qwen-coder",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2409.12186",
|
|
"arxiv:2309.00071",
|
|
"arxiv:2407.10671",
|
|
"base_model:Qwen/Qwen2.5-Coder-32B-Instruct",
|
|
"base_model:quantized:Qwen/Qwen2.5-Coder-32B-Instruct",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"4-bit",
|
|
"awq",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "24GB+ VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 22,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 32,
|
|
"parameters_active_b": 32,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "mistral-7b-instruct-v0.1",
|
|
"name": "Mistral 7B Instruct V0.1",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1",
|
|
"description": "Open source model mistralai/Mistral-7B-Instruct-v0.1. 1826 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 1826,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"pytorch",
|
|
"safetensors",
|
|
"mistral",
|
|
"finetuned",
|
|
"mistral-common",
|
|
"conversational",
|
|
"arxiv:2310.06825",
|
|
"base_model:mistralai/Mistral-7B-v0.1",
|
|
"base_model:finetune:mistralai/Mistral-7B-v0.1",
|
|
"text-generation-inference",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 5,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 7,
|
|
"parameters_active_b": 7,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "gpt-oss-20b-mxfp4-q8",
|
|
"name": "Gpt Oss 20B Mxfp4 Q8",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/mlx-community/gpt-oss-20b-MXFP4-Q8",
|
|
"description": "Open source model mlx-community/gpt-oss-20b-MXFP4-Q8. 31 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 31,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"mlx",
|
|
"safetensors",
|
|
"gpt_oss",
|
|
"vllm",
|
|
"conversational",
|
|
"base_model:openai/gpt-oss-20b",
|
|
"base_model:quantized:openai/gpt-oss-20b",
|
|
"4-bit",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "16GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 14,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 20,
|
|
"parameters_active_b": 20,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-embedding-4b",
|
|
"name": "Qwen3 Embedding 4B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen3-Embedding-4B",
|
|
"description": "Open source model Qwen/Qwen3-Embedding-4B. 224 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 224,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"sentence-transformers",
|
|
"safetensors",
|
|
"qwen3",
|
|
"transformers",
|
|
"sentence-similarity",
|
|
"feature-extraction",
|
|
"text-embeddings-inference",
|
|
"arxiv:2506.05176",
|
|
"base_model:Qwen/Qwen3-4B-Base",
|
|
"base_model:finetune:Qwen/Qwen3-4B-Base",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 3,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 4,
|
|
"parameters_active_b": 4,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen2.5-1.5b-instruct-awq",
|
|
"name": "Qwen2.5 1.5B Instruct Awq",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-AWQ",
|
|
"description": "Open source model Qwen/Qwen2.5-1.5B-Instruct-AWQ. 6 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 6,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen2",
|
|
"chat",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2407.10671",
|
|
"base_model:Qwen/Qwen2.5-1.5B-Instruct",
|
|
"base_model:quantized:Qwen/Qwen2.5-1.5B-Instruct",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"4-bit",
|
|
"awq",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 4,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 5,
|
|
"parameters_active_b": 5,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "meta-llama-3.1-8b-instruct-fp8",
|
|
"name": "Meta Llama 3.1 8B Instruct Fp8",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8",
|
|
"description": "Open source model RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8. 44 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 44,
|
|
"language": "Python",
|
|
"license": "llama3.1",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"llama",
|
|
"fp8",
|
|
"vllm",
|
|
"conversational",
|
|
"en",
|
|
"de",
|
|
"fr",
|
|
"it",
|
|
"pt",
|
|
"hi",
|
|
"es",
|
|
"th",
|
|
"base_model:meta-llama/Llama-3.1-8B-Instruct",
|
|
"base_model:quantized:meta-llama/Llama-3.1-8B-Instruct",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"compressed-tensors",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 6,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 8,
|
|
"parameters_active_b": 8,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "phi-4",
|
|
"name": "Phi 4",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/microsoft/phi-4",
|
|
"description": "Open source model microsoft/phi-4. 2220 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 2220,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"phi3",
|
|
"phi",
|
|
"nlp",
|
|
"math",
|
|
"code",
|
|
"chat",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2412.08905",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "deepseek-r1",
|
|
"name": "Deepseek R1",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/deepseek-ai/DeepSeek-R1",
|
|
"description": "Open source model deepseek-ai/DeepSeek-R1. 13011 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 13011,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"deepseek_v3",
|
|
"conversational",
|
|
"custom_code",
|
|
"arxiv:2501.12948",
|
|
"eval-results",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"fp8",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "llama-3.2-1b-instruct-fp8",
|
|
"name": "Llama 3.2 1B Instruct Fp8",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/RedHatAI/Llama-3.2-1B-Instruct-FP8",
|
|
"description": "Open source model RedHatAI/Llama-3.2-1B-Instruct-FP8. 3 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 3,
|
|
"language": "Python",
|
|
"license": "llama3.2",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"safetensors",
|
|
"llama",
|
|
"llama-3",
|
|
"neuralmagic",
|
|
"llmcompressor",
|
|
"conversational",
|
|
"en",
|
|
"de",
|
|
"fr",
|
|
"it",
|
|
"pt",
|
|
"hi",
|
|
"es",
|
|
"th",
|
|
"base_model:meta-llama/Llama-3.2-1B-Instruct",
|
|
"base_model:quantized:meta-llama/Llama-3.2-1B-Instruct",
|
|
"compressed-tensors",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 1,
|
|
"parameters_active_b": 1,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "llama-3.1-405b",
|
|
"name": "Llama 3.1 405B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/meta-llama/Llama-3.1-405B",
|
|
"description": "Open source model meta-llama/Llama-3.1-405B. 961 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 961,
|
|
"language": "Python",
|
|
"license": "llama3.1",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"llama",
|
|
"facebook",
|
|
"meta",
|
|
"pytorch",
|
|
"llama-3",
|
|
"en",
|
|
"de",
|
|
"fr",
|
|
"it",
|
|
"pt",
|
|
"hi",
|
|
"es",
|
|
"th",
|
|
"arxiv:2204.05149",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "24GB+ VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 284,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 405,
|
|
"parameters_active_b": 405,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-4b-thinking-2507",
|
|
"name": "Qwen3 4B Thinking 2507",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen3-4B-Thinking-2507",
|
|
"description": "Open source model Qwen/Qwen3-4B-Thinking-2507. 548 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 548,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen3",
|
|
"conversational",
|
|
"arxiv:2505.09388",
|
|
"eval-results",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 3,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 4,
|
|
"parameters_active_b": 4,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "gpt2-medium",
|
|
"name": "Gpt2 Medium",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/openai-community/gpt2-medium",
|
|
"description": "Open source model openai-community/gpt2-medium. 193 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 193,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"pytorch",
|
|
"tf",
|
|
"jax",
|
|
"rust",
|
|
"onnx",
|
|
"safetensors",
|
|
"gpt2",
|
|
"en",
|
|
"arxiv:1910.09700",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "tiny-gpt2",
|
|
"name": "Tiny Gpt2",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/sshleifer/tiny-gpt2",
|
|
"description": "Open source model sshleifer/tiny-gpt2. 34 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 34,
|
|
"language": "Python",
|
|
"license": "unknown",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"pytorch",
|
|
"tf",
|
|
"jax",
|
|
"gpt2",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "hermes-3-llama-3.1-8b",
|
|
"name": "Hermes 3 Llama 3.1 8B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B",
|
|
"description": "Open source model NousResearch/Hermes-3-Llama-3.1-8B. 385 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 385,
|
|
"language": "Python",
|
|
"license": "llama3",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"llama",
|
|
"Llama-3",
|
|
"instruct",
|
|
"finetune",
|
|
"chatml",
|
|
"gpt4",
|
|
"synthetic data",
|
|
"distillation",
|
|
"function calling",
|
|
"json mode",
|
|
"axolotl",
|
|
"roleplaying",
|
|
"chat",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2408.11857",
|
|
"base_model:meta-llama/Llama-3.1-8B",
|
|
"base_model:finetune:meta-llama/Llama-3.1-8B",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 6,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 8,
|
|
"parameters_active_b": 8,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "phi-3.5-vision-instruct",
|
|
"name": "Phi 3.5 Vision Instruct",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/microsoft/Phi-3.5-vision-instruct",
|
|
"description": "Open source model microsoft/Phi-3.5-vision-instruct. 726 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 726,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"phi3_v",
|
|
"nlp",
|
|
"code",
|
|
"vision",
|
|
"image-text-to-text",
|
|
"conversational",
|
|
"custom_code",
|
|
"multilingual",
|
|
"arxiv:2404.14219",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": true
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "minimax-m2",
|
|
"name": "Minimax M2",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/MiniMaxAI/MiniMax-M2",
|
|
"description": "Open source model MiniMaxAI/MiniMax-M2. 1485 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 1485,
|
|
"language": "Python",
|
|
"license": "other",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"minimax_m2",
|
|
"conversational",
|
|
"custom_code",
|
|
"arxiv:2504.07164",
|
|
"arxiv:2509.06501",
|
|
"arxiv:2509.13160",
|
|
"eval-results",
|
|
"endpoints_compatible",
|
|
"fp8",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "deepseek-r1-distill-llama-8b",
|
|
"name": "Deepseek R1 Distill Llama 8B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
|
|
"description": "Open source model deepseek-ai/DeepSeek-R1-Distill-Llama-8B. 843 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 843,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"llama",
|
|
"conversational",
|
|
"arxiv:2501.12948",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 6,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 8,
|
|
"parameters_active_b": 8,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-14b-awq",
|
|
"name": "Qwen3 14B Awq",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen3-14B-AWQ",
|
|
"description": "Open source model Qwen/Qwen3-14B-AWQ. 57 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 57,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen3",
|
|
"conversational",
|
|
"arxiv:2309.00071",
|
|
"arxiv:2505.09388",
|
|
"base_model:Qwen/Qwen3-14B",
|
|
"base_model:quantized:Qwen/Qwen3-14B",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"4-bit",
|
|
"awq",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "16GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 10,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 14,
|
|
"parameters_active_b": 14,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-235b-a22b",
|
|
"name": "Qwen3 235B A22B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen3-235B-A22B",
|
|
"description": "Open source model Qwen/Qwen3-235B-A22B. 1075 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 1075,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen3_moe",
|
|
"conversational",
|
|
"arxiv:2309.00071",
|
|
"arxiv:2505.09388",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "24GB+ VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 164,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 235,
|
|
"parameters_active_b": 235,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "meta-llama-3.1-8b-instruct-awq-int4",
|
|
"name": "Meta Llama 3.1 8B Instruct Awq Int4",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4",
|
|
"description": "Open source model hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4. 87 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 87,
|
|
"language": "Python",
|
|
"license": "llama3.1",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"llama",
|
|
"llama-3.1",
|
|
"meta",
|
|
"autoawq",
|
|
"conversational",
|
|
"en",
|
|
"de",
|
|
"fr",
|
|
"it",
|
|
"pt",
|
|
"hi",
|
|
"es",
|
|
"th",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"4-bit",
|
|
"awq",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 6,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 8,
|
|
"parameters_active_b": 8,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "lfm2.5-1.2b-instruct-mlx-8bit",
|
|
"name": "Lfm2.5 1.2B Instruct Mlx 8Bit",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/lmstudio-community/LFM2.5-1.2B-Instruct-MLX-8bit",
|
|
"description": "Open source model lmstudio-community/LFM2.5-1.2B-Instruct-MLX-8bit. 1 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 1,
|
|
"language": "Python",
|
|
"license": "other",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"lfm2",
|
|
"liquid",
|
|
"lfm2.5",
|
|
"edge",
|
|
"mlx",
|
|
"conversational",
|
|
"en",
|
|
"ar",
|
|
"zh",
|
|
"fr",
|
|
"de",
|
|
"ja",
|
|
"ko",
|
|
"es",
|
|
"base_model:LiquidAI/LFM2.5-1.2B-Instruct",
|
|
"base_model:quantized:LiquidAI/LFM2.5-1.2B-Instruct",
|
|
"endpoints_compatible",
|
|
"8-bit",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 2,
|
|
"parameters_active_b": 2,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "glm-4.7-flash-gguf",
|
|
"name": "Glm 4.7 Flash Gguf",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/unsloth/GLM-4.7-Flash-GGUF",
|
|
"description": "Open source model unsloth/GLM-4.7-Flash-GGUF. 482 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 482,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"gguf",
|
|
"unsloth",
|
|
"en",
|
|
"zh",
|
|
"arxiv:2508.06471",
|
|
"base_model:zai-org/GLM-4.7-Flash",
|
|
"base_model:quantized:zai-org/GLM-4.7-Flash",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us",
|
|
"imatrix",
|
|
"conversational"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "deepseek-r1-distill-qwen-14b",
|
|
"name": "Deepseek R1 Distill Qwen 14B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
|
|
"description": "Open source model deepseek-ai/DeepSeek-R1-Distill-Qwen-14B. 603 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 603,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen2",
|
|
"conversational",
|
|
"arxiv:2501.12948",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "16GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 10,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 14,
|
|
"parameters_active_b": 14,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "lfm2.5-1.2b-instruct-mlx-6bit",
|
|
"name": "Lfm2.5 1.2B Instruct Mlx 6Bit",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/lmstudio-community/LFM2.5-1.2B-Instruct-MLX-6bit",
|
|
"description": "Open source model lmstudio-community/LFM2.5-1.2B-Instruct-MLX-6bit. 4 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 4,
|
|
"language": "Python",
|
|
"license": "other",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"lfm2",
|
|
"liquid",
|
|
"lfm2.5",
|
|
"edge",
|
|
"mlx",
|
|
"conversational",
|
|
"en",
|
|
"ar",
|
|
"zh",
|
|
"fr",
|
|
"de",
|
|
"ja",
|
|
"ko",
|
|
"es",
|
|
"base_model:LiquidAI/LFM2.5-1.2B-Instruct",
|
|
"base_model:quantized:LiquidAI/LFM2.5-1.2B-Instruct",
|
|
"endpoints_compatible",
|
|
"6-bit",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 2,
|
|
"parameters_active_b": 2,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "lfm2.5-1.2b-instruct-mlx-4bit",
|
|
"name": "Lfm2.5 1.2B Instruct Mlx 4Bit",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/lmstudio-community/LFM2.5-1.2B-Instruct-MLX-4bit",
|
|
"description": "Open source model lmstudio-community/LFM2.5-1.2B-Instruct-MLX-4bit. 1 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 1,
|
|
"language": "Python",
|
|
"license": "other",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"lfm2",
|
|
"liquid",
|
|
"lfm2.5",
|
|
"edge",
|
|
"mlx",
|
|
"conversational",
|
|
"en",
|
|
"ar",
|
|
"zh",
|
|
"fr",
|
|
"de",
|
|
"ja",
|
|
"ko",
|
|
"es",
|
|
"base_model:LiquidAI/LFM2.5-1.2B-Instruct",
|
|
"base_model:quantized:LiquidAI/LFM2.5-1.2B-Instruct",
|
|
"endpoints_compatible",
|
|
"4-bit",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 2,
|
|
"parameters_active_b": 2,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "vicuna-7b-v1.5",
|
|
"name": "Vicuna 7B V1.5",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/lmsys/vicuna-7b-v1.5",
|
|
"description": "Open source model lmsys/vicuna-7b-v1.5. 387 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 387,
|
|
"language": "Python",
|
|
"license": "llama2",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"pytorch",
|
|
"llama",
|
|
"arxiv:2307.09288",
|
|
"arxiv:2306.05685",
|
|
"text-generation-inference",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 5,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 7,
|
|
"parameters_active_b": 7,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "llama-3.2-1b-instruct-q8_0-gguf",
|
|
"name": "Llama 3.2 1B Instruct Q8_0 Gguf",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/hugging-quants/Llama-3.2-1B-Instruct-Q8_0-GGUF",
|
|
"description": "Open source model hugging-quants/Llama-3.2-1B-Instruct-Q8_0-GGUF. 43 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 43,
|
|
"language": "Python",
|
|
"license": "unknown",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"gguf",
|
|
"facebook",
|
|
"meta",
|
|
"pytorch",
|
|
"llama",
|
|
"llama-3",
|
|
"llama-cpp",
|
|
"gguf-my-repo",
|
|
"en",
|
|
"de",
|
|
"fr",
|
|
"it",
|
|
"pt",
|
|
"hi",
|
|
"es",
|
|
"th",
|
|
"base_model:meta-llama/Llama-3.2-1B-Instruct",
|
|
"base_model:quantized:meta-llama/Llama-3.2-1B-Instruct",
|
|
"endpoints_compatible",
|
|
"region:us",
|
|
"conversational"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 1,
|
|
"parameters_active_b": 1,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "llama-3.3-70b-instruct-awq",
|
|
"name": "Llama 3.3 70B Instruct Awq",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/kosbu/Llama-3.3-70B-Instruct-AWQ",
|
|
"description": "Open source model kosbu/Llama-3.3-70B-Instruct-AWQ. 10 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 10,
|
|
"language": "Python",
|
|
"license": "llama3.3",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"llama",
|
|
"facebook",
|
|
"meta",
|
|
"llama-3",
|
|
"awq",
|
|
"conversational",
|
|
"en",
|
|
"fr",
|
|
"it",
|
|
"pt",
|
|
"hi",
|
|
"es",
|
|
"th",
|
|
"de",
|
|
"base_model:meta-llama/Llama-3.3-70B-Instruct",
|
|
"base_model:quantized:meta-llama/Llama-3.3-70B-Instruct",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"4-bit",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "24GB+ VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 49,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 70,
|
|
"parameters_active_b": 70,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-32b-fp8",
|
|
"name": "Qwen3 32B Fp8",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen3-32B-FP8",
|
|
"description": "Open source model Qwen/Qwen3-32B-FP8. 80 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 80,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen3",
|
|
"conversational",
|
|
"arxiv:2309.00071",
|
|
"arxiv:2505.09388",
|
|
"base_model:Qwen/Qwen3-32B",
|
|
"base_model:quantized:Qwen/Qwen3-32B",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"fp8",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "24GB+ VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 22,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 32,
|
|
"parameters_active_b": 32,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "gpt2-xl",
|
|
"name": "Gpt2 Xl",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/openai-community/gpt2-xl",
|
|
"description": "Open source model openai-community/gpt2-xl. 373 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 373,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"pytorch",
|
|
"tf",
|
|
"jax",
|
|
"rust",
|
|
"safetensors",
|
|
"gpt2",
|
|
"en",
|
|
"arxiv:1910.09700",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-4b-instruct-2507-fp8",
|
|
"name": "Qwen3 4B Instruct 2507 Fp8",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507-FP8",
|
|
"description": "Open source model Qwen/Qwen3-4B-Instruct-2507-FP8. 65 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 65,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen3",
|
|
"conversational",
|
|
"arxiv:2505.09388",
|
|
"base_model:Qwen/Qwen3-4B-Instruct-2507",
|
|
"base_model:quantized:Qwen/Qwen3-4B-Instruct-2507",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"fp8",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 3,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 4,
|
|
"parameters_active_b": 4,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "xlnet-base-cased",
|
|
"name": "Xlnet Base Cased",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/xlnet/xlnet-base-cased",
|
|
"description": "Open source model xlnet/xlnet-base-cased. 80 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 80,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"pytorch",
|
|
"tf",
|
|
"rust",
|
|
"xlnet",
|
|
"en",
|
|
"dataset:bookcorpus",
|
|
"dataset:wikipedia",
|
|
"arxiv:1906.08237",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "llama-2-7b-hf",
|
|
"name": "Llama 2 7B Hf",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/meta-llama/Llama-2-7b-hf",
|
|
"description": "Open source model meta-llama/Llama-2-7b-hf. 2268 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 2268,
|
|
"language": "Python",
|
|
"license": "llama2",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"pytorch",
|
|
"safetensors",
|
|
"llama",
|
|
"facebook",
|
|
"meta",
|
|
"llama-2",
|
|
"en",
|
|
"arxiv:2307.09288",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 5,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 7,
|
|
"parameters_active_b": 7,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen2.5-math-7b-instruct",
|
|
"name": "Qwen2.5 Math 7B Instruct",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen2.5-Math-7B-Instruct",
|
|
"description": "Open source model Qwen/Qwen2.5-Math-7B-Instruct. 89 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 89,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen2",
|
|
"chat",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2409.12122",
|
|
"base_model:Qwen/Qwen2.5-Math-7B",
|
|
"base_model:finetune:Qwen/Qwen2.5-Math-7B",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 5,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 7,
|
|
"parameters_active_b": 7,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-reranker-0.6b",
|
|
"name": "Qwen3 Reranker 0.6B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen3-Reranker-0.6B",
|
|
"description": "Open source model Qwen/Qwen3-Reranker-0.6B. 305 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 305,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen3",
|
|
"text-ranking",
|
|
"arxiv:2506.05176",
|
|
"base_model:Qwen/Qwen3-0.6B-Base",
|
|
"base_model:finetune:Qwen/Qwen3-0.6B-Base",
|
|
"text-embeddings-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 4,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 6,
|
|
"parameters_active_b": 6,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen2.5-1.5b",
|
|
"name": "Qwen2.5 1.5B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen2.5-1.5B",
|
|
"description": "Open source model Qwen/Qwen2.5-1.5B. 165 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 165,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen2",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2407.10671",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 4,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 5,
|
|
"parameters_active_b": 5,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-30b-a3b-thinking-2507",
|
|
"name": "Qwen3 30B A3B Thinking 2507",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen3-30B-A3B-Thinking-2507",
|
|
"description": "Open source model Qwen/Qwen3-30B-A3B-Thinking-2507. 359 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 359,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen3_moe",
|
|
"conversational",
|
|
"arxiv:2402.17463",
|
|
"arxiv:2407.02490",
|
|
"arxiv:2501.15383",
|
|
"arxiv:2404.06654",
|
|
"arxiv:2505.09388",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "24GB+ VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 21,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 30,
|
|
"parameters_active_b": 30,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "smollm2-135m-instruct",
|
|
"name": "Smollm2 135M Instruct",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/HuggingFaceTB/SmolLM2-135M-Instruct",
|
|
"description": "Open source model HuggingFaceTB/SmolLM2-135M-Instruct. 292 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 292,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"tensorboard",
|
|
"onnx",
|
|
"safetensors",
|
|
"llama",
|
|
"transformers.js",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2502.02737",
|
|
"base_model:HuggingFaceTB/SmolLM2-135M",
|
|
"base_model:quantized:HuggingFaceTB/SmolLM2-135M",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen2.5-math-1.5b",
|
|
"name": "Qwen2.5 Math 1.5B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen2.5-Math-1.5B",
|
|
"description": "Open source model Qwen/Qwen2.5-Math-1.5B. 100 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 100,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen2",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2409.12122",
|
|
"base_model:Qwen/Qwen2.5-1.5B",
|
|
"base_model:finetune:Qwen/Qwen2.5-1.5B",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 4,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 5,
|
|
"parameters_active_b": 5,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "glm-4.5-air-awq-4bit",
|
|
"name": "Glm 4.5 Air Awq 4Bit",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/cyankiwi/GLM-4.5-Air-AWQ-4bit",
|
|
"description": "Open source model cyankiwi/GLM-4.5-Air-AWQ-4bit. 27 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 27,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"glm4_moe",
|
|
"conversational",
|
|
"en",
|
|
"zh",
|
|
"arxiv:2508.06471",
|
|
"base_model:zai-org/GLM-4.5-Air",
|
|
"base_model:quantized:zai-org/GLM-4.5-Air",
|
|
"endpoints_compatible",
|
|
"compressed-tensors",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 3,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 4,
|
|
"parameters_active_b": 4,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "llama-2-7b-chat-hf",
|
|
"name": "Llama 2 7B Chat Hf",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/meta-llama/Llama-2-7b-chat-hf",
|
|
"description": "Open source model meta-llama/Llama-2-7b-chat-hf. 4705 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 4705,
|
|
"language": "Python",
|
|
"license": "llama2",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"pytorch",
|
|
"safetensors",
|
|
"llama",
|
|
"facebook",
|
|
"meta",
|
|
"llama-2",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2307.09288",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 5,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 7,
|
|
"parameters_active_b": 7,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen2.5-coder-7b-instruct-gptq-int4",
|
|
"name": "Qwen2.5 Coder 7B Instruct Gptq Int4",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-Int4",
|
|
"description": "Open source model Qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-Int4. 12 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 12,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen2",
|
|
"code",
|
|
"codeqwen",
|
|
"chat",
|
|
"qwen",
|
|
"qwen-coder",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2409.12186",
|
|
"arxiv:2309.00071",
|
|
"arxiv:2407.10671",
|
|
"base_model:Qwen/Qwen2.5-Coder-7B-Instruct",
|
|
"base_model:quantized:Qwen/Qwen2.5-Coder-7B-Instruct",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"4-bit",
|
|
"gptq",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 5,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 7,
|
|
"parameters_active_b": 7,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-vl-30b-a3b-instruct-awq",
|
|
"name": "Qwen3 Vl 30B A3B Instruct Awq",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/QuantTrio/Qwen3-VL-30B-A3B-Instruct-AWQ",
|
|
"description": "Open source model QuantTrio/Qwen3-VL-30B-A3B-Instruct-AWQ. 38 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 38,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen3_vl_moe",
|
|
"image-text-to-text",
|
|
"AWQ",
|
|
"vLLM",
|
|
"conversational",
|
|
"arxiv:2505.09388",
|
|
"arxiv:2502.13923",
|
|
"arxiv:2409.12191",
|
|
"arxiv:2308.12966",
|
|
"base_model:Qwen/Qwen3-VL-30B-A3B-Instruct",
|
|
"base_model:quantized:Qwen/Qwen3-VL-30B-A3B-Instruct",
|
|
"endpoints_compatible",
|
|
"4-bit",
|
|
"awq",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "24GB+ VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 21,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 30,
|
|
"parameters_active_b": 30,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-8b-base",
|
|
"name": "Qwen3 8B Base",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen3-8B-Base",
|
|
"description": "Open source model Qwen/Qwen3-8B-Base. 82 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 82,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen3",
|
|
"conversational",
|
|
"arxiv:2505.09388",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 6,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 8,
|
|
"parameters_active_b": 8,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen2.5-coder-14b-instruct",
|
|
"name": "Qwen2.5 Coder 14B Instruct",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen2.5-Coder-14B-Instruct",
|
|
"description": "Open source model Qwen/Qwen2.5-Coder-14B-Instruct. 140 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 140,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen2",
|
|
"code",
|
|
"codeqwen",
|
|
"chat",
|
|
"qwen",
|
|
"qwen-coder",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2409.12186",
|
|
"arxiv:2309.00071",
|
|
"arxiv:2407.10671",
|
|
"base_model:Qwen/Qwen2.5-Coder-14B",
|
|
"base_model:finetune:Qwen/Qwen2.5-Coder-14B",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "16GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 10,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 14,
|
|
"parameters_active_b": 14,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "stories15m_moe",
|
|
"name": "Stories15M_Moe",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/ggml-org/stories15M_MOE",
|
|
"description": "Open source model ggml-org/stories15M_MOE. 5 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 5,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"gguf",
|
|
"mixtral",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "opt-1.3b",
|
|
"name": "Opt 1.3B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/facebook/opt-1.3b",
|
|
"description": "Open source model facebook/opt-1.3b. 182 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 182,
|
|
"language": "Python",
|
|
"license": "other",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"pytorch",
|
|
"tf",
|
|
"jax",
|
|
"opt",
|
|
"en",
|
|
"arxiv:2205.01068",
|
|
"arxiv:2005.14165",
|
|
"text-generation-inference",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 2,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 3,
|
|
"parameters_active_b": 3,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "minimax-m2-awq",
|
|
"name": "Minimax M2 Awq",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/QuantTrio/MiniMax-M2-AWQ",
|
|
"description": "Open source model QuantTrio/MiniMax-M2-AWQ. 8 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 8,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"mixtral",
|
|
"vLLM",
|
|
"AWQ",
|
|
"conversational",
|
|
"arxiv:2504.07164",
|
|
"arxiv:2509.06501",
|
|
"arxiv:2509.13160",
|
|
"base_model:MiniMaxAI/MiniMax-M2",
|
|
"base_model:quantized:MiniMaxAI/MiniMax-M2",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"4-bit",
|
|
"awq",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "glm-4.7-flash-nvfp4",
|
|
"name": "Glm 4.7 Flash Nvfp4",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/GadflyII/GLM-4.7-Flash-NVFP4",
|
|
"description": "Open source model GadflyII/GLM-4.7-Flash-NVFP4. 62 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 62,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"glm4_moe_lite",
|
|
"moe",
|
|
"nvfp4",
|
|
"quantized",
|
|
"vllm",
|
|
"glm",
|
|
"30b",
|
|
"conversational",
|
|
"en",
|
|
"zh",
|
|
"base_model:zai-org/GLM-4.7-Flash",
|
|
"base_model:quantized:zai-org/GLM-4.7-Flash",
|
|
"endpoints_compatible",
|
|
"compressed-tensors",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "hy-mt1.5-7b",
|
|
"name": "Hy Mt1.5 7B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/tencent/HY-MT1.5-7B",
|
|
"description": "Open source model tencent/HY-MT1.5-7B. 133 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 133,
|
|
"language": "Python",
|
|
"license": "unknown",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"hunyuan_v1_dense",
|
|
"translation",
|
|
"zh",
|
|
"en",
|
|
"fr",
|
|
"pt",
|
|
"es",
|
|
"ja",
|
|
"tr",
|
|
"ru",
|
|
"ar",
|
|
"ko",
|
|
"th",
|
|
"it",
|
|
"de",
|
|
"vi",
|
|
"ms",
|
|
"id",
|
|
"tl",
|
|
"hi",
|
|
"pl",
|
|
"cs",
|
|
"nl",
|
|
"km",
|
|
"my",
|
|
"fa",
|
|
"gu",
|
|
"ur",
|
|
"te",
|
|
"mr",
|
|
"he",
|
|
"bn",
|
|
"ta",
|
|
"uk",
|
|
"bo",
|
|
"kk",
|
|
"mn",
|
|
"ug",
|
|
"arxiv:2512.24092",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 5,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 7,
|
|
"parameters_active_b": 7,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "gemma-2-27b-it",
|
|
"name": "Gemma 2 27B It",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/google/gemma-2-27b-it",
|
|
"description": "Open source model google/gemma-2-27b-it. 559 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 559,
|
|
"language": "Python",
|
|
"license": "gemma",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"gemma2",
|
|
"conversational",
|
|
"arxiv:2009.03300",
|
|
"arxiv:1905.07830",
|
|
"arxiv:1911.11641",
|
|
"arxiv:1904.09728",
|
|
"arxiv:1905.10044",
|
|
"arxiv:1907.10641",
|
|
"arxiv:1811.00937",
|
|
"arxiv:1809.02789",
|
|
"arxiv:1911.01547",
|
|
"arxiv:1705.03551",
|
|
"arxiv:2107.03374",
|
|
"arxiv:2108.07732",
|
|
"arxiv:2110.14168",
|
|
"arxiv:2009.11462",
|
|
"arxiv:2101.11718",
|
|
"arxiv:2110.08193",
|
|
"arxiv:1804.09301",
|
|
"arxiv:2109.07958",
|
|
"arxiv:1804.06876",
|
|
"arxiv:2103.03874",
|
|
"arxiv:2304.06364",
|
|
"arxiv:2206.04615",
|
|
"arxiv:2203.09509",
|
|
"base_model:google/gemma-2-27b",
|
|
"base_model:finetune:google/gemma-2-27b",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "24GB+ VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 19,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 27,
|
|
"parameters_active_b": 27,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-coder-next-gguf",
|
|
"name": "Qwen3 Coder Next Gguf",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/unsloth/Qwen3-Coder-Next-GGUF",
|
|
"description": "Open source model unsloth/Qwen3-Coder-Next-GGUF. 347 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 347,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"gguf",
|
|
"qwen3_next",
|
|
"unsloth",
|
|
"qwen",
|
|
"qwen3",
|
|
"base_model:Qwen/Qwen3-Coder-Next",
|
|
"base_model:quantized:Qwen/Qwen3-Coder-Next",
|
|
"endpoints_compatible",
|
|
"region:us",
|
|
"imatrix",
|
|
"conversational"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "gte-qwen2-1.5b-instruct",
|
|
"name": "Gte Qwen2 1.5B Instruct",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Alibaba-NLP/gte-Qwen2-1.5B-instruct",
|
|
"description": "Open source model Alibaba-NLP/gte-Qwen2-1.5B-instruct. 229 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 229,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"sentence-transformers",
|
|
"safetensors",
|
|
"qwen2",
|
|
"mteb",
|
|
"transformers",
|
|
"Qwen2",
|
|
"sentence-similarity",
|
|
"custom_code",
|
|
"arxiv:2308.03281",
|
|
"model-index",
|
|
"text-embeddings-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 4,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 5,
|
|
"parameters_active_b": 5,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "lfm2-1.2b",
|
|
"name": "Lfm2 1.2B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/LiquidAI/LFM2-1.2B",
|
|
"description": "Open source model LiquidAI/LFM2-1.2B. 349 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 349,
|
|
"language": "Python",
|
|
"license": "other",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"lfm2",
|
|
"liquid",
|
|
"edge",
|
|
"conversational",
|
|
"en",
|
|
"ar",
|
|
"zh",
|
|
"fr",
|
|
"de",
|
|
"ja",
|
|
"ko",
|
|
"es",
|
|
"arxiv:2511.23404",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 2,
|
|
"parameters_active_b": 2,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "saiga_llama3_8b",
|
|
"name": "Saiga_Llama3_8B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/IlyaGusev/saiga_llama3_8b",
|
|
"description": "Open source model IlyaGusev/saiga_llama3_8b. 137 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 137,
|
|
"language": "Python",
|
|
"license": "other",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"llama",
|
|
"conversational",
|
|
"ru",
|
|
"dataset:IlyaGusev/saiga_scored",
|
|
"doi:10.57967/hf/2368",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 6,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 8,
|
|
"parameters_active_b": 8,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-1.7b-base",
|
|
"name": "Qwen3 1.7B Base",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen3-1.7B-Base",
|
|
"description": "Open source model Qwen/Qwen3-1.7B-Base. 62 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 62,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen3",
|
|
"conversational",
|
|
"arxiv:2505.09388",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 5,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 7,
|
|
"parameters_active_b": 7,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "mistral-7b-v0.3-bnb-4bit",
|
|
"name": "Mistral 7B V0.3 Bnb 4Bit",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/unsloth/mistral-7b-v0.3-bnb-4bit",
|
|
"description": "Open source model unsloth/mistral-7b-v0.3-bnb-4bit. 22 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 22,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"mistral",
|
|
"unsloth",
|
|
"mistral-7b",
|
|
"en",
|
|
"base_model:mistralai/Mistral-7B-v0.3",
|
|
"base_model:quantized:mistralai/Mistral-7B-v0.3",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"4-bit",
|
|
"bitsandbytes",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 5,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 7,
|
|
"parameters_active_b": 7,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "gemma-2-2b-it",
|
|
"name": "Gemma 2 2B It",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/google/gemma-2-2b-it",
|
|
"description": "Open source model google/gemma-2-2b-it. 1285 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 1285,
|
|
"language": "Python",
|
|
"license": "gemma",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"gemma2",
|
|
"conversational",
|
|
"arxiv:2009.03300",
|
|
"arxiv:1905.07830",
|
|
"arxiv:1911.11641",
|
|
"arxiv:1904.09728",
|
|
"arxiv:1905.10044",
|
|
"arxiv:1907.10641",
|
|
"arxiv:1811.00937",
|
|
"arxiv:1809.02789",
|
|
"arxiv:1911.01547",
|
|
"arxiv:1705.03551",
|
|
"arxiv:2107.03374",
|
|
"arxiv:2108.07732",
|
|
"arxiv:2110.14168",
|
|
"arxiv:2009.11462",
|
|
"arxiv:2101.11718",
|
|
"arxiv:2110.08193",
|
|
"arxiv:1804.09301",
|
|
"arxiv:2109.07958",
|
|
"arxiv:1804.06876",
|
|
"arxiv:2103.03874",
|
|
"arxiv:2304.06364",
|
|
"arxiv:1903.00161",
|
|
"arxiv:2206.04615",
|
|
"arxiv:2203.09509",
|
|
"arxiv:2403.13793",
|
|
"base_model:google/gemma-2-2b",
|
|
"base_model:finetune:google/gemma-2-2b",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 2,
|
|
"parameters_active_b": 2,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "phi-4-multimodal-instruct",
|
|
"name": "Phi 4 Multimodal Instruct",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/microsoft/Phi-4-multimodal-instruct",
|
|
"description": "Open source model microsoft/Phi-4-multimodal-instruct. 1573 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 1573,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"phi4mm",
|
|
"nlp",
|
|
"code",
|
|
"audio",
|
|
"automatic-speech-recognition",
|
|
"speech-summarization",
|
|
"speech-translation",
|
|
"visual-question-answering",
|
|
"phi-4-multimodal",
|
|
"phi",
|
|
"phi-4-mini",
|
|
"custom_code",
|
|
"multilingual",
|
|
"ar",
|
|
"zh",
|
|
"cs",
|
|
"da",
|
|
"nl",
|
|
"en",
|
|
"fi",
|
|
"fr",
|
|
"de",
|
|
"he",
|
|
"hu",
|
|
"it",
|
|
"ja",
|
|
"ko",
|
|
"no",
|
|
"pl",
|
|
"pt",
|
|
"ru",
|
|
"es",
|
|
"sv",
|
|
"th",
|
|
"tr",
|
|
"uk",
|
|
"arxiv:2503.01743",
|
|
"arxiv:2407.13833",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "pythia-70m-deduped",
|
|
"name": "Pythia 70M Deduped",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/EleutherAI/pythia-70m-deduped",
|
|
"description": "Open source model EleutherAI/pythia-70m-deduped. 27 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 27,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"pytorch",
|
|
"safetensors",
|
|
"gpt_neox",
|
|
"causal-lm",
|
|
"pythia",
|
|
"en",
|
|
"dataset:EleutherAI/the_pile_deduplicated",
|
|
"arxiv:2304.01373",
|
|
"arxiv:2101.00027",
|
|
"arxiv:2201.07311",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "dialogpt-medium",
|
|
"name": "Dialogpt Medium",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/microsoft/DialoGPT-medium",
|
|
"description": "Open source model microsoft/DialoGPT-medium. 433 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 433,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"pytorch",
|
|
"tf",
|
|
"jax",
|
|
"rust",
|
|
"gpt2",
|
|
"conversational",
|
|
"arxiv:1911.00536",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "gpt-oss-20b-bf16",
|
|
"name": "Gpt Oss 20B Bf16",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/unsloth/gpt-oss-20b-BF16",
|
|
"description": "Open source model unsloth/gpt-oss-20b-BF16. 29 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 29,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"gpt_oss",
|
|
"vllm",
|
|
"unsloth",
|
|
"conversational",
|
|
"base_model:openai/gpt-oss-20b",
|
|
"base_model:finetune:openai/gpt-oss-20b",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "16GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 14,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 20,
|
|
"parameters_active_b": 20,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen2.5-72b-instruct",
|
|
"name": "Qwen2.5 72B Instruct",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
|
|
"description": "Open source model Qwen/Qwen2.5-72B-Instruct. 910 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 910,
|
|
"language": "Python",
|
|
"license": "other",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen2",
|
|
"chat",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2309.00071",
|
|
"arxiv:2407.10671",
|
|
"base_model:Qwen/Qwen2.5-72B",
|
|
"base_model:finetune:Qwen/Qwen2.5-72B",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "24GB+ VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 50,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 72,
|
|
"parameters_active_b": 72,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-32b-awq",
|
|
"name": "Qwen3 32B Awq",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen3-32B-AWQ",
|
|
"description": "Open source model Qwen/Qwen3-32B-AWQ. 125 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 125,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen3",
|
|
"conversational",
|
|
"arxiv:2309.00071",
|
|
"arxiv:2505.09388",
|
|
"base_model:Qwen/Qwen3-32B",
|
|
"base_model:quantized:Qwen/Qwen3-32B",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"4-bit",
|
|
"awq",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "24GB+ VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 22,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 32,
|
|
"parameters_active_b": 32,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "mimo-v2-flash",
|
|
"name": "Mimo V2 Flash",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/XiaomiMiMo/MiMo-V2-Flash",
|
|
"description": "Open source model XiaomiMiMo/MiMo-V2-Flash. 628 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 628,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"mimo_v2_flash",
|
|
"conversational",
|
|
"custom_code",
|
|
"eval-results",
|
|
"fp8",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-coder-30b-a3b-instruct-fp8",
|
|
"name": "Qwen3 Coder 30B A3B Instruct Fp8",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8",
|
|
"description": "Open source model Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8. 158 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 158,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen3_moe",
|
|
"conversational",
|
|
"arxiv:2505.09388",
|
|
"endpoints_compatible",
|
|
"fp8",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "24GB+ VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 21,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 30,
|
|
"parameters_active_b": 30,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-8b-fp8",
|
|
"name": "Qwen3 8B Fp8",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen3-8B-FP8",
|
|
"description": "Open source model Qwen/Qwen3-8B-FP8. 56 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 56,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen3",
|
|
"conversational",
|
|
"arxiv:2309.00071",
|
|
"arxiv:2505.09388",
|
|
"base_model:Qwen/Qwen3-8B",
|
|
"base_model:quantized:Qwen/Qwen3-8B",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"fp8",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 6,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 8,
|
|
"parameters_active_b": 8,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "deepseek-v3.2",
|
|
"name": "Deepseek V3.2",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/deepseek-ai/DeepSeek-V3.2",
|
|
"description": "Open source model deepseek-ai/DeepSeek-V3.2. 1251 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 1251,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"deepseek_v32",
|
|
"conversational",
|
|
"base_model:deepseek-ai/DeepSeek-V3.2-Exp-Base",
|
|
"base_model:finetune:deepseek-ai/DeepSeek-V3.2-Exp-Base",
|
|
"eval-results",
|
|
"endpoints_compatible",
|
|
"fp8",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-coder-next",
|
|
"name": "Qwen3 Coder Next",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen3-Coder-Next",
|
|
"description": "Open source model Qwen/Qwen3-Coder-Next. 912 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 912,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen3_next",
|
|
"conversational",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen2-0.5b",
|
|
"name": "Qwen2 0.5B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen2-0.5B",
|
|
"description": "Open source model Qwen/Qwen2-0.5B. 164 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 164,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen2",
|
|
"pretrained",
|
|
"conversational",
|
|
"en",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 4,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 5,
|
|
"parameters_active_b": 5,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "mistral-7b-v0.1",
|
|
"name": "Mistral 7B V0.1",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/mistralai/Mistral-7B-v0.1",
|
|
"description": "Open source model mistralai/Mistral-7B-v0.1. 4042 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 4042,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"pytorch",
|
|
"safetensors",
|
|
"mistral",
|
|
"pretrained",
|
|
"mistral-common",
|
|
"en",
|
|
"arxiv:2310.06825",
|
|
"text-generation-inference",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 5,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 7,
|
|
"parameters_active_b": 7,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "kimi-k2-thinking",
|
|
"name": "Kimi K2 Thinking",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/moonshotai/Kimi-K2-Thinking",
|
|
"description": "Open source model moonshotai/Kimi-K2-Thinking. 1670 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 1670,
|
|
"language": "Python",
|
|
"license": "other",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"kimi_k2",
|
|
"conversational",
|
|
"custom_code",
|
|
"eval-results",
|
|
"endpoints_compatible",
|
|
"compressed-tensors",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "deepseek-r1-0528-qwen3-8b-mlx-4bit",
|
|
"name": "Deepseek R1 0528 Qwen3 8B Mlx 4Bit",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/lmstudio-community/DeepSeek-R1-0528-Qwen3-8B-MLX-4bit",
|
|
"description": "Open source model lmstudio-community/DeepSeek-R1-0528-Qwen3-8B-MLX-4bit. 7 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 7,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"mlx",
|
|
"safetensors",
|
|
"qwen3",
|
|
"conversational",
|
|
"base_model:deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
|
|
"base_model:quantized:deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
|
|
"4-bit",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 6,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 8,
|
|
"parameters_active_b": 8,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen2.5-7b-instruct-awq",
|
|
"name": "Qwen2.5 7B Instruct Awq",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct-AWQ",
|
|
"description": "Open source model Qwen/Qwen2.5-7B-Instruct-AWQ. 36 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 36,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen2",
|
|
"chat",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2309.00071",
|
|
"arxiv:2407.10671",
|
|
"base_model:Qwen/Qwen2.5-7B-Instruct",
|
|
"base_model:quantized:Qwen/Qwen2.5-7B-Instruct",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"4-bit",
|
|
"awq",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 5,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 7,
|
|
"parameters_active_b": 7,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "points-reader",
|
|
"name": "Points Reader",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/tencent/POINTS-Reader",
|
|
"description": "Open source model tencent/POINTS-Reader. 100 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 100,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"image-text-to-text",
|
|
"conversational",
|
|
"custom_code",
|
|
"arxiv:2509.01215",
|
|
"arxiv:2412.08443",
|
|
"arxiv:2409.04828",
|
|
"arxiv:2405.11850",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-4b-base",
|
|
"name": "Qwen3 4B Base",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen3-4B-Base",
|
|
"description": "Open source model Qwen/Qwen3-4B-Base. 80 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 80,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen3",
|
|
"conversational",
|
|
"arxiv:2505.09388",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 3,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 4,
|
|
"parameters_active_b": 4,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "step-3.5-flash",
|
|
"name": "Step 3.5 Flash",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/stepfun-ai/Step-3.5-Flash",
|
|
"description": "Open source model stepfun-ai/Step-3.5-Flash. 621 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 621,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"step3p5",
|
|
"conversational",
|
|
"custom_code",
|
|
"arxiv:2602.10604",
|
|
"arxiv:2601.05593",
|
|
"arxiv:2507.19427",
|
|
"eval-results",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "kogpt2-base-v2",
|
|
"name": "Kogpt2 Base V2",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/skt/kogpt2-base-v2",
|
|
"description": "Open source model skt/kogpt2-base-v2. 60 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 60,
|
|
"language": "Python",
|
|
"license": "cc-by-nc-sa-4.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"pytorch",
|
|
"jax",
|
|
"gpt2",
|
|
"ko",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "parler-tts-mini-multilingual-v1.1",
|
|
"name": "Parler Tts Mini Multilingual V1.1",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/parler-tts/parler-tts-mini-multilingual-v1.1",
|
|
"description": "Open source model parler-tts/parler-tts-mini-multilingual-v1.1. 54 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 54,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"parler_tts",
|
|
"text-to-speech",
|
|
"annotation",
|
|
"en",
|
|
"fr",
|
|
"es",
|
|
"pt",
|
|
"pl",
|
|
"de",
|
|
"nl",
|
|
"it",
|
|
"dataset:facebook/multilingual_librispeech",
|
|
"dataset:parler-tts/libritts_r_filtered",
|
|
"dataset:parler-tts/libritts-r-filtered-speaker-descriptions",
|
|
"dataset:parler-tts/mls_eng",
|
|
"dataset:parler-tts/mls-eng-speaker-descriptions",
|
|
"dataset:ylacombe/mls-annotated",
|
|
"dataset:ylacombe/cml-tts-filtered-annotated",
|
|
"dataset:PHBJT/cml-tts-filtered",
|
|
"arxiv:2402.01912",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-reranker-8b",
|
|
"name": "Qwen3 Reranker 8B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen3-Reranker-8B",
|
|
"description": "Open source model Qwen/Qwen3-Reranker-8B. 213 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 213,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen3",
|
|
"text-ranking",
|
|
"arxiv:2506.05176",
|
|
"base_model:Qwen/Qwen3-8B-Base",
|
|
"base_model:finetune:Qwen/Qwen3-8B-Base",
|
|
"text-embeddings-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 6,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 8,
|
|
"parameters_active_b": 8,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "deepseek-r1-0528-qwen3-8b-mlx-8bit",
|
|
"name": "Deepseek R1 0528 Qwen3 8B Mlx 8Bit",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/lmstudio-community/DeepSeek-R1-0528-Qwen3-8B-MLX-8bit",
|
|
"description": "Open source model lmstudio-community/DeepSeek-R1-0528-Qwen3-8B-MLX-8bit. 13 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 13,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"mlx",
|
|
"safetensors",
|
|
"qwen3",
|
|
"conversational",
|
|
"base_model:deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
|
|
"base_model:quantized:deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
|
|
"8-bit",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 6,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 8,
|
|
"parameters_active_b": 8,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "powermoe-3b",
|
|
"name": "Powermoe 3B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/ibm-research/PowerMoE-3b",
|
|
"description": "Open source model ibm-research/PowerMoE-3b. 14 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 14,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"granitemoe",
|
|
"arxiv:2408.13359",
|
|
"model-index",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 2,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 3,
|
|
"parameters_active_b": 3,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "llada-8b-instruct",
|
|
"name": "Llada 8B Instruct",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/GSAI-ML/LLaDA-8B-Instruct",
|
|
"description": "Open source model GSAI-ML/LLaDA-8B-Instruct. 342 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 342,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"llada",
|
|
"conversational",
|
|
"custom_code",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 6,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 8,
|
|
"parameters_active_b": 8,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "apertus-8b-instruct-2509",
|
|
"name": "Apertus 8B Instruct 2509",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/swiss-ai/Apertus-8B-Instruct-2509",
|
|
"description": "Open source model swiss-ai/Apertus-8B-Instruct-2509. 435 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 435,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"apertus",
|
|
"multilingual",
|
|
"compliant",
|
|
"swiss-ai",
|
|
"conversational",
|
|
"arxiv:2509.14233",
|
|
"base_model:swiss-ai/Apertus-8B-2509",
|
|
"base_model:finetune:swiss-ai/Apertus-8B-2509",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 6,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 8,
|
|
"parameters_active_b": 8,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-30b-a3b-gptq-int4",
|
|
"name": "Qwen3 30B A3B Gptq Int4",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen3-30B-A3B-GPTQ-Int4",
|
|
"description": "Open source model Qwen/Qwen3-30B-A3B-GPTQ-Int4. 45 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 45,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen3_moe",
|
|
"conversational",
|
|
"arxiv:2309.00071",
|
|
"arxiv:2505.09388",
|
|
"base_model:Qwen/Qwen3-30B-A3B",
|
|
"base_model:quantized:Qwen/Qwen3-30B-A3B",
|
|
"endpoints_compatible",
|
|
"4-bit",
|
|
"gptq",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "24GB+ VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 21,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 30,
|
|
"parameters_active_b": 30,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "tinyllama-1.1b-chat-v0.3-gptq",
|
|
"name": "Tinyllama 1.1B Chat V0.3 Gptq",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GPTQ",
|
|
"description": "Open source model TheBloke/TinyLlama-1.1B-Chat-v0.3-GPTQ. 9 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 9,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"llama",
|
|
"en",
|
|
"dataset:cerebras/SlimPajama-627B",
|
|
"dataset:bigcode/starcoderdata",
|
|
"dataset:OpenAssistant/oasst_top1_2023-08-25",
|
|
"base_model:TinyLlama/TinyLlama-1.1B-Chat-v0.3",
|
|
"base_model:quantized:TinyLlama/TinyLlama-1.1B-Chat-v0.3",
|
|
"text-generation-inference",
|
|
"4-bit",
|
|
"gptq",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 1,
|
|
"parameters_active_b": 1,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "prot_t5_xl_bfd",
|
|
"name": "Prot_T5_Xl_Bfd",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Rostlab/prot_t5_xl_bfd",
|
|
"description": "Open source model Rostlab/prot_t5_xl_bfd. 10 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 10,
|
|
"language": "Python",
|
|
"license": "unknown",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"pytorch",
|
|
"tf",
|
|
"t5",
|
|
"protein language model",
|
|
"dataset:BFD",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-4b-instruct-2507-unsloth-bnb-4bit",
|
|
"name": "Qwen3 4B Instruct 2507 Unsloth Bnb 4Bit",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/unsloth/Qwen3-4B-Instruct-2507-unsloth-bnb-4bit",
|
|
"description": "Open source model unsloth/Qwen3-4B-Instruct-2507-unsloth-bnb-4bit. 13 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 13,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen3",
|
|
"unsloth",
|
|
"conversational",
|
|
"arxiv:2505.09388",
|
|
"base_model:Qwen/Qwen3-4B-Instruct-2507",
|
|
"base_model:quantized:Qwen/Qwen3-4B-Instruct-2507",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"4-bit",
|
|
"bitsandbytes",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 3,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 4,
|
|
"parameters_active_b": 4,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "phi-3.5-mini-instruct",
|
|
"name": "Phi 3.5 Mini Instruct",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/microsoft/Phi-3.5-mini-instruct",
|
|
"description": "Open source model microsoft/Phi-3.5-mini-instruct. 963 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 963,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"phi3",
|
|
"nlp",
|
|
"code",
|
|
"conversational",
|
|
"custom_code",
|
|
"multilingual",
|
|
"arxiv:2404.14219",
|
|
"arxiv:2407.13833",
|
|
"arxiv:2403.06412",
|
|
"eval-results",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "meta-llama-3.1-8b-instruct-bnb-4bit",
|
|
"name": "Meta Llama 3.1 8B Instruct Bnb 4Bit",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
|
|
"description": "Open source model unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit. 95 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 95,
|
|
"language": "Python",
|
|
"license": "llama3.1",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"llama",
|
|
"llama-3",
|
|
"meta",
|
|
"facebook",
|
|
"unsloth",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2204.05149",
|
|
"base_model:meta-llama/Llama-3.1-8B-Instruct",
|
|
"base_model:quantized:meta-llama/Llama-3.1-8B-Instruct",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"4-bit",
|
|
"bitsandbytes",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 6,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 8,
|
|
"parameters_active_b": 8,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "glm-4.7-flash-awq-4bit",
|
|
"name": "Glm 4.7 Flash Awq 4Bit",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/cyankiwi/GLM-4.7-Flash-AWQ-4bit",
|
|
"description": "Open source model cyankiwi/GLM-4.7-Flash-AWQ-4bit. 43 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 43,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"glm4_moe_lite",
|
|
"conversational",
|
|
"en",
|
|
"zh",
|
|
"arxiv:2508.06471",
|
|
"base_model:zai-org/GLM-4.7-Flash",
|
|
"base_model:quantized:zai-org/GLM-4.7-Flash",
|
|
"endpoints_compatible",
|
|
"compressed-tensors",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 3,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 4,
|
|
"parameters_active_b": 4,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "dots.ocr",
|
|
"name": "Dots.Ocr",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/rednote-hilab/dots.ocr",
|
|
"description": "Open source model rednote-hilab/dots.ocr. 1243 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 1243,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"dots_ocr",
|
|
"safetensors",
|
|
"image-to-text",
|
|
"ocr",
|
|
"document-parse",
|
|
"layout",
|
|
"table",
|
|
"formula",
|
|
"transformers",
|
|
"custom_code",
|
|
"image-text-to-text",
|
|
"conversational",
|
|
"en",
|
|
"zh",
|
|
"multilingual",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "mistral-7b-bnb-4bit",
|
|
"name": "Mistral 7B Bnb 4Bit",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/unsloth/mistral-7b-bnb-4bit",
|
|
"description": "Open source model unsloth/mistral-7b-bnb-4bit. 30 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 30,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"mistral",
|
|
"unsloth",
|
|
"mistral-7b",
|
|
"bnb",
|
|
"en",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"4-bit",
|
|
"bitsandbytes",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 5,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 7,
|
|
"parameters_active_b": 7,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "glm-5-fp8",
|
|
"name": "Glm 5 Fp8",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/zai-org/GLM-5-FP8",
|
|
"description": "Open source model zai-org/GLM-5-FP8. 108 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 108,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"glm_moe_dsa",
|
|
"conversational",
|
|
"en",
|
|
"zh",
|
|
"endpoints_compatible",
|
|
"fp8",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen-7b",
|
|
"name": "Qwen 7B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen-7B",
|
|
"description": "Open source model Qwen/Qwen-7B. 395 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 395,
|
|
"language": "Python",
|
|
"license": "other",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen",
|
|
"custom_code",
|
|
"zh",
|
|
"en",
|
|
"arxiv:2309.16609",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 5,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 7,
|
|
"parameters_active_b": 7,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwq-32b-awq",
|
|
"name": "Qwq 32B Awq",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/QwQ-32B-AWQ",
|
|
"description": "Open source model Qwen/QwQ-32B-AWQ. 133 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 133,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"safetensors",
|
|
"qwen2",
|
|
"chat",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2309.00071",
|
|
"arxiv:2412.15115",
|
|
"base_model:Qwen/QwQ-32B",
|
|
"base_model:quantized:Qwen/QwQ-32B",
|
|
"4-bit",
|
|
"awq",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "24GB+ VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 22,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 32,
|
|
"parameters_active_b": 32,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "deepseek-r1-distill-llama-70b",
|
|
"name": "Deepseek R1 Distill Llama 70B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
|
|
"description": "Open source model deepseek-ai/DeepSeek-R1-Distill-Llama-70B. 741 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 741,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"llama",
|
|
"conversational",
|
|
"arxiv:2501.12948",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "24GB+ VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 49,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 70,
|
|
"parameters_active_b": 70,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen2.5-coder-7b",
|
|
"name": "Qwen2.5 Coder 7B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen2.5-Coder-7B",
|
|
"description": "Open source model Qwen/Qwen2.5-Coder-7B. 134 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 134,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen2",
|
|
"code",
|
|
"qwen",
|
|
"qwen-coder",
|
|
"codeqwen",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2409.12186",
|
|
"arxiv:2309.00071",
|
|
"arxiv:2407.10671",
|
|
"base_model:Qwen/Qwen2.5-7B",
|
|
"base_model:finetune:Qwen/Qwen2.5-7B",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 5,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 7,
|
|
"parameters_active_b": 7,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen2.5-3b",
|
|
"name": "Qwen2.5 3B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen2.5-3B",
|
|
"description": "Open source model Qwen/Qwen2.5-3B. 169 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 169,
|
|
"language": "Python",
|
|
"license": "other",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"safetensors",
|
|
"qwen2",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2407.10671",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 2,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 3,
|
|
"parameters_active_b": 3,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "deepseek-v2-lite-chat",
|
|
"name": "Deepseek V2 Lite Chat",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/deepseek-ai/DeepSeek-V2-Lite-Chat",
|
|
"description": "Open source model deepseek-ai/DeepSeek-V2-Lite-Chat. 133 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 133,
|
|
"language": "Python",
|
|
"license": "other",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"deepseek_v2",
|
|
"conversational",
|
|
"custom_code",
|
|
"arxiv:2405.04434",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "tiny-qwen3forcausallm",
|
|
"name": "Tiny Qwen3Forcausallm",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/trl-internal-testing/tiny-Qwen3ForCausalLM",
|
|
"description": "Open source model trl-internal-testing/tiny-Qwen3ForCausalLM. 1 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 1,
|
|
"language": "Python",
|
|
"license": "unknown",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen3",
|
|
"trl",
|
|
"conversational",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "deepseek-coder-v2-lite-instruct",
|
|
"name": "Deepseek Coder V2 Lite Instruct",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct",
|
|
"description": "Open source model deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct. 539 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 539,
|
|
"language": "Python",
|
|
"license": "other",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"deepseek_v2",
|
|
"conversational",
|
|
"custom_code",
|
|
"arxiv:2401.06066",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen3-0.6b-base",
|
|
"name": "Qwen3 0.6B Base",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen3-0.6B-Base",
|
|
"description": "Open source model Qwen/Qwen3-0.6B-Base. 146 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 146,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen3",
|
|
"conversational",
|
|
"arxiv:2505.09388",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 4,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 6,
|
|
"parameters_active_b": 6,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "diffractgpt_mistral_chemical_formula",
|
|
"name": "Diffractgpt_Mistral_Chemical_Formula",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/knc6/diffractgpt_mistral_chemical_formula",
|
|
"description": "Open source model knc6/diffractgpt_mistral_chemical_formula. 1 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 1,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"peft",
|
|
"safetensors",
|
|
"chemistry",
|
|
"text-generation-inference",
|
|
"atomgpt",
|
|
"diffraction",
|
|
"en",
|
|
"base_model:unsloth/mistral-7b-bnb-4bit",
|
|
"base_model:adapter:unsloth/mistral-7b-bnb-4bit",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "qwen-7b-chat",
|
|
"name": "Qwen 7B Chat",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Qwen/Qwen-7B-Chat",
|
|
"description": "Open source model Qwen/Qwen-7B-Chat. 787 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 787,
|
|
"language": "Python",
|
|
"license": "other",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"qwen",
|
|
"custom_code",
|
|
"zh",
|
|
"en",
|
|
"arxiv:2309.16609",
|
|
"arxiv:2305.08322",
|
|
"arxiv:2009.03300",
|
|
"arxiv:2305.05280",
|
|
"arxiv:2210.03629",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 5,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 7,
|
|
"parameters_active_b": 7,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "nvidia-nemotron-3-nano-30b-a3b-nvfp4",
|
|
"name": "Nvidia Nemotron 3 Nano 30B A3B Nvfp4",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4",
|
|
"description": "Open source model nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4. 100 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 100,
|
|
"language": "Python",
|
|
"license": "other",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"nemotron_h",
|
|
"feature-extraction",
|
|
"nvidia",
|
|
"pytorch",
|
|
"conversational",
|
|
"custom_code",
|
|
"en",
|
|
"es",
|
|
"fr",
|
|
"de",
|
|
"ja",
|
|
"it",
|
|
"dataset:nvidia/Nemotron-Pretraining-Code-v1",
|
|
"dataset:nvidia/Nemotron-CC-v2",
|
|
"dataset:nvidia/Nemotron-Pretraining-SFT-v1",
|
|
"dataset:nvidia/Nemotron-CC-Math-v1",
|
|
"dataset:nvidia/Nemotron-Pretraining-Code-v2",
|
|
"dataset:nvidia/Nemotron-Pretraining-Specialized-v1",
|
|
"dataset:nvidia/Nemotron-CC-v2.1",
|
|
"dataset:nvidia/Nemotron-CC-Code-v1",
|
|
"dataset:nvidia/Nemotron-Pretraining-Dataset-sample",
|
|
"dataset:nvidia/Nemotron-Competitive-Programming-v1",
|
|
"dataset:nvidia/Nemotron-Math-v2",
|
|
"dataset:nvidia/Nemotron-Agentic-v1",
|
|
"dataset:nvidia/Nemotron-Math-Proofs-v1",
|
|
"dataset:nvidia/Nemotron-Instruction-Following-Chat-v1",
|
|
"dataset:nvidia/Nemotron-Science-v1",
|
|
"dataset:nvidia/Nemotron-3-Nano-RL-Training-Blend",
|
|
"arxiv:2512.20848",
|
|
"arxiv:2512.20856",
|
|
"arxiv:2601.20088",
|
|
"base_model:nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
|
|
"base_model:quantized:nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "24GB+ VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 21,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 30,
|
|
"parameters_active_b": 30,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "falcon-h1-tiny-90m-instruct",
|
|
"name": "Falcon H1 Tiny 90M Instruct",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/tiiuae/Falcon-H1-Tiny-90M-Instruct",
|
|
"description": "Open source model tiiuae/Falcon-H1-Tiny-90M-Instruct. 31 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 31,
|
|
"language": "Python",
|
|
"license": "other",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"falcon_h1",
|
|
"falcon-h1",
|
|
"edge",
|
|
"conversational",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "hermes-3-llama-3.2-3b",
|
|
"name": "Hermes 3 Llama 3.2 3B",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/NousResearch/Hermes-3-Llama-3.2-3B",
|
|
"description": "Open source model NousResearch/Hermes-3-Llama-3.2-3B. 174 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 174,
|
|
"language": "Python",
|
|
"license": "llama3",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"llama",
|
|
"Llama-3",
|
|
"instruct",
|
|
"finetune",
|
|
"chatml",
|
|
"gpt4",
|
|
"synthetic data",
|
|
"distillation",
|
|
"function calling",
|
|
"json mode",
|
|
"axolotl",
|
|
"roleplaying",
|
|
"chat",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2408.11857",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"deploy:azure",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 2,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 3,
|
|
"parameters_active_b": 3,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "meta-llama-3.1-8b-instruct",
|
|
"name": "Meta Llama 3.1 8B Instruct",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/unsloth/Meta-Llama-3.1-8B-Instruct",
|
|
"description": "Open source model unsloth/Meta-Llama-3.1-8B-Instruct. 94 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 94,
|
|
"language": "Python",
|
|
"license": "llama3.1",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"llama",
|
|
"llama-3",
|
|
"meta",
|
|
"facebook",
|
|
"unsloth",
|
|
"conversational",
|
|
"en",
|
|
"base_model:meta-llama/Llama-3.1-8B-Instruct",
|
|
"base_model:finetune:meta-llama/Llama-3.1-8B-Instruct",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 6,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 8,
|
|
"parameters_active_b": 8,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "meta-llama-3.1-8b-instruct-gguf",
|
|
"name": "Meta Llama 3.1 8B Instruct Gguf",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
|
|
"description": "Open source model bartowski/Meta-Llama-3.1-8B-Instruct-GGUF. 321 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 321,
|
|
"language": "Python",
|
|
"license": "llama3.1",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"gguf",
|
|
"facebook",
|
|
"meta",
|
|
"pytorch",
|
|
"llama",
|
|
"llama-3",
|
|
"en",
|
|
"de",
|
|
"fr",
|
|
"it",
|
|
"pt",
|
|
"hi",
|
|
"es",
|
|
"th",
|
|
"base_model:meta-llama/Llama-3.1-8B-Instruct",
|
|
"base_model:quantized:meta-llama/Llama-3.1-8B-Instruct",
|
|
"endpoints_compatible",
|
|
"region:us",
|
|
"conversational"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 6,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 8,
|
|
"parameters_active_b": 8,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "deepseek-v3-0324",
|
|
"name": "Deepseek V3 0324",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/deepseek-ai/DeepSeek-V3-0324",
|
|
"description": "Open source model deepseek-ai/DeepSeek-V3-0324. 3087 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 3087,
|
|
"language": "Python",
|
|
"license": "mit",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"deepseek_v3",
|
|
"conversational",
|
|
"custom_code",
|
|
"arxiv:2412.19437",
|
|
"eval-results",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"fp8",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "elm",
|
|
"name": "Elm",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/Joaoffg/ELM",
|
|
"description": "Open source model Joaoffg/ELM. 2 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 2,
|
|
"language": "Python",
|
|
"license": "llama2",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"llama",
|
|
"academic",
|
|
"university",
|
|
"en",
|
|
"nl",
|
|
"arxiv:2408.06931",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "llama-2-13b-chat-hf",
|
|
"name": "Llama 2 13B Chat Hf",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/meta-llama/Llama-2-13b-chat-hf",
|
|
"description": "Open source model meta-llama/Llama-2-13b-chat-hf. 1109 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 1109,
|
|
"language": "Python",
|
|
"license": "llama2",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"pytorch",
|
|
"safetensors",
|
|
"llama",
|
|
"facebook",
|
|
"meta",
|
|
"llama-2",
|
|
"conversational",
|
|
"en",
|
|
"arxiv:2307.09288",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "16GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 9,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 13,
|
|
"parameters_active_b": 13,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
},
|
|
{
|
|
"slug": "svara-tts-v1",
|
|
"name": "Svara Tts V1",
|
|
"category": "AI Models",
|
|
"is_open_source": true,
|
|
"website": "https://huggingface.co/kenpath/svara-tts-v1",
|
|
"description": "Open source model kenpath/svara-tts-v1. 18 likes on Hugging Face.",
|
|
"pros": [
|
|
"Open Source",
|
|
"Running Locally"
|
|
],
|
|
"cons": [
|
|
"Requires GPU"
|
|
],
|
|
"stars": 18,
|
|
"language": "Python",
|
|
"license": "apache-2.0",
|
|
"tags": [
|
|
"AI",
|
|
"LLM",
|
|
"transformers",
|
|
"safetensors",
|
|
"llama",
|
|
"text-to-speech",
|
|
"speech-synthesis",
|
|
"multilingual",
|
|
"indic",
|
|
"orpheus",
|
|
"lora",
|
|
"low-latency",
|
|
"gguf",
|
|
"zero-shot",
|
|
"emotions",
|
|
"discrete-audio-tokens",
|
|
"hi",
|
|
"bn",
|
|
"mr",
|
|
"te",
|
|
"kn",
|
|
"bho",
|
|
"mag",
|
|
"hne",
|
|
"mai",
|
|
"as",
|
|
"brx",
|
|
"doi",
|
|
"gu",
|
|
"ml",
|
|
"pa",
|
|
"ta",
|
|
"ne",
|
|
"sa",
|
|
"en",
|
|
"dataset:SYSPIN",
|
|
"dataset:RASA",
|
|
"dataset:IndicTTS",
|
|
"dataset:SPICOR",
|
|
"base_model:canopylabs/3b-hi-ft-research_release",
|
|
"base_model:adapter:canopylabs/3b-hi-ft-research_release",
|
|
"text-generation-inference",
|
|
"endpoints_compatible",
|
|
"region:us"
|
|
],
|
|
"hardware_req": "8GB VRAM",
|
|
"hosting_type": "self-hosted",
|
|
"ai_metadata": {
|
|
"vram_inference_gb": 1,
|
|
"context_window_tokens": 4096,
|
|
"parameters_total_b": 0,
|
|
"parameters_active_b": 0,
|
|
"is_multimodal": false
|
|
},
|
|
"referral_url": ""
|
|
}
|
|
] |