Files
altstack-data/data/tools_expanded.json
2026-02-25 22:36:27 +05:30

9193 lines
220 KiB
JSON

[
{
"slug": "qwen2.5-7b-instruct",
"name": "Qwen2.5 7B Instruct",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct",
"description": "Open source model Qwen/Qwen2.5-7B-Instruct. 1073 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 1073,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen2",
"chat",
"conversational",
"en",
"arxiv:2309.00071",
"arxiv:2407.10671",
"base_model:Qwen/Qwen2.5-7B",
"base_model:finetune:Qwen/Qwen2.5-7B",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 5,
"context_window_tokens": 4096,
"parameters_total_b": 7,
"parameters_active_b": 7,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-0.6b",
"name": "Qwen3 0.6B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen3-0.6B",
"description": "Open source model Qwen/Qwen3-0.6B. 1083 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 1083,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen3",
"conversational",
"arxiv:2505.09388",
"base_model:Qwen/Qwen3-0.6B-Base",
"base_model:finetune:Qwen/Qwen3-0.6B-Base",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 4,
"context_window_tokens": 4096,
"parameters_total_b": 6,
"parameters_active_b": 6,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "gpt2",
"name": "Gpt2",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/openai-community/gpt2",
"description": "Open source model openai-community/gpt2. 3114 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 3114,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"transformers",
"pytorch",
"tf",
"jax",
"tflite",
"rust",
"onnx",
"safetensors",
"gpt2",
"exbert",
"en",
"doi:10.57967/hf/0039",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen2.5-1.5b-instruct",
"name": "Qwen2.5 1.5B Instruct",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct",
"description": "Open source model Qwen/Qwen2.5-1.5B-Instruct. 617 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 617,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen2",
"chat",
"conversational",
"en",
"arxiv:2407.10671",
"base_model:Qwen/Qwen2.5-1.5B",
"base_model:finetune:Qwen/Qwen2.5-1.5B",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 4,
"context_window_tokens": 4096,
"parameters_total_b": 5,
"parameters_active_b": 5,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen2.5-3b-instruct",
"name": "Qwen2.5 3B Instruct",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen2.5-3B-Instruct",
"description": "Open source model Qwen/Qwen2.5-3B-Instruct. 404 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 404,
"language": "Python",
"license": "other",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen2",
"chat",
"conversational",
"en",
"arxiv:2407.10671",
"base_model:Qwen/Qwen2.5-3B",
"base_model:finetune:Qwen/Qwen2.5-3B",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 2,
"context_window_tokens": 4096,
"parameters_total_b": 3,
"parameters_active_b": 3,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "llama-3.1-8b-instruct",
"name": "Llama 3.1 8B Instruct",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct",
"description": "Open source model meta-llama/Llama-3.1-8B-Instruct. 5467 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 5467,
"language": "Python",
"license": "llama3.1",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"llama",
"facebook",
"meta",
"pytorch",
"llama-3",
"conversational",
"en",
"de",
"fr",
"it",
"pt",
"hi",
"es",
"th",
"arxiv:2204.05149",
"base_model:meta-llama/Llama-3.1-8B",
"base_model:finetune:meta-llama/Llama-3.1-8B",
"eval-results",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 6,
"context_window_tokens": 4096,
"parameters_total_b": 8,
"parameters_active_b": 8,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "gpt-oss-20b",
"name": "Gpt Oss 20B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/openai/gpt-oss-20b",
"description": "Open source model openai/gpt-oss-20b. 4378 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 4378,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"gpt_oss",
"vllm",
"conversational",
"arxiv:2508.10925",
"endpoints_compatible",
"8-bit",
"mxfp4",
"deploy:azure",
"region:us"
],
"hardware_req": "16GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 14,
"context_window_tokens": 4096,
"parameters_total_b": 20,
"parameters_active_b": 20,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen2.5-0.5b-instruct",
"name": "Qwen2.5 0.5B Instruct",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct",
"description": "Open source model Qwen/Qwen2.5-0.5B-Instruct. 463 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 463,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen2",
"chat",
"conversational",
"en",
"arxiv:2407.10671",
"base_model:Qwen/Qwen2.5-0.5B",
"base_model:finetune:Qwen/Qwen2.5-0.5B",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 4,
"context_window_tokens": 4096,
"parameters_total_b": 5,
"parameters_active_b": 5,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-4b",
"name": "Qwen3 4B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen3-4B",
"description": "Open source model Qwen/Qwen3-4B. 552 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 552,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen3",
"conversational",
"arxiv:2309.00071",
"arxiv:2505.09388",
"base_model:Qwen/Qwen3-4B-Base",
"base_model:finetune:Qwen/Qwen3-4B-Base",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 3,
"context_window_tokens": 4096,
"parameters_total_b": 4,
"parameters_active_b": 4,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-8b",
"name": "Qwen3 8B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen3-8B",
"description": "Open source model Qwen/Qwen3-8B. 940 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 940,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen3",
"conversational",
"arxiv:2309.00071",
"arxiv:2505.09388",
"base_model:Qwen/Qwen3-8B-Base",
"base_model:finetune:Qwen/Qwen3-8B-Base",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 6,
"context_window_tokens": 4096,
"parameters_total_b": 8,
"parameters_active_b": 8,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen2.5-32b-instruct",
"name": "Qwen2.5 32B Instruct",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen2.5-32B-Instruct",
"description": "Open source model Qwen/Qwen2.5-32B-Instruct. 328 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 328,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen2",
"chat",
"conversational",
"en",
"arxiv:2309.00071",
"arxiv:2407.10671",
"base_model:Qwen/Qwen2.5-32B",
"base_model:finetune:Qwen/Qwen2.5-32B",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "24GB+ VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 22,
"context_window_tokens": 4096,
"parameters_total_b": 32,
"parameters_active_b": 32,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "opt-125m",
"name": "Opt 125M",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/facebook/opt-125m",
"description": "Open source model facebook/opt-125m. 233 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 233,
"language": "Python",
"license": "other",
"tags": [
"AI",
"LLM",
"transformers",
"pytorch",
"tf",
"jax",
"opt",
"en",
"arxiv:2205.01068",
"arxiv:2005.14165",
"text-generation-inference",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-1.7b",
"name": "Qwen3 1.7B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen3-1.7B",
"description": "Open source model Qwen/Qwen3-1.7B. 422 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 422,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen3",
"conversational",
"arxiv:2505.09388",
"base_model:Qwen/Qwen3-1.7B-Base",
"base_model:finetune:Qwen/Qwen3-1.7B-Base",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 5,
"context_window_tokens": 4096,
"parameters_total_b": 7,
"parameters_active_b": 7,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "tiny-qwen2forcausallm-2.5",
"name": "Tiny Qwen2Forcausallm 2.5",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
"description": "Open source model trl-internal-testing/tiny-Qwen2ForCausalLM-2.5. 3 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 3,
"language": "Python",
"license": "unknown",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen2",
"trl",
"conversational",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "dolphin-2.9.1-yi-1.5-34b",
"name": "Dolphin 2.9.1 Yi 1.5 34B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/dphn/dolphin-2.9.1-yi-1.5-34b",
"description": "Open source model dphn/dolphin-2.9.1-yi-1.5-34b. 54 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 54,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"llama",
"generated_from_trainer",
"axolotl",
"conversational",
"dataset:cognitivecomputations/Dolphin-2.9",
"dataset:teknium/OpenHermes-2.5",
"dataset:m-a-p/CodeFeedback-Filtered-Instruction",
"dataset:cognitivecomputations/dolphin-coder",
"dataset:cognitivecomputations/samantha-data",
"dataset:microsoft/orca-math-word-problems-200k",
"dataset:Locutusque/function-calling-chatml",
"dataset:internlm/Agent-FLAN",
"base_model:01-ai/Yi-1.5-34B",
"base_model:finetune:01-ai/Yi-1.5-34B",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "24GB+ VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 24,
"context_window_tokens": 4096,
"parameters_total_b": 34,
"parameters_active_b": 34,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-embedding-0.6b",
"name": "Qwen3 Embedding 0.6B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen3-Embedding-0.6B",
"description": "Open source model Qwen/Qwen3-Embedding-0.6B. 879 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 879,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"sentence-transformers",
"safetensors",
"qwen3",
"transformers",
"sentence-similarity",
"feature-extraction",
"text-embeddings-inference",
"arxiv:2506.05176",
"base_model:Qwen/Qwen3-0.6B-Base",
"base_model:finetune:Qwen/Qwen3-0.6B-Base",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 4,
"context_window_tokens": 4096,
"parameters_total_b": 6,
"parameters_active_b": 6,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "gpt-oss-120b",
"name": "Gpt Oss 120B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/openai/gpt-oss-120b",
"description": "Open source model openai/gpt-oss-120b. 4503 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 4503,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"gpt_oss",
"vllm",
"conversational",
"arxiv:2508.10925",
"endpoints_compatible",
"8-bit",
"mxfp4",
"deploy:azure",
"region:us"
],
"hardware_req": "24GB+ VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 84,
"context_window_tokens": 4096,
"parameters_total_b": 120,
"parameters_active_b": 120,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-4b-instruct-2507",
"name": "Qwen3 4B Instruct 2507",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507",
"description": "Open source model Qwen/Qwen3-4B-Instruct-2507. 730 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 730,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen3",
"conversational",
"arxiv:2505.09388",
"eval-results",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 3,
"context_window_tokens": 4096,
"parameters_total_b": 4,
"parameters_active_b": 4,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "moondream2",
"name": "Moondream2",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/vikhyatk/moondream2",
"description": "Open source model vikhyatk/moondream2. 1373 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 1373,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"moondream1",
"image-text-to-text",
"custom_code",
"doi:10.57967/hf/6762",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "llama-3.2-1b-instruct",
"name": "Llama 3.2 1B Instruct",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct",
"description": "Open source model meta-llama/Llama-3.2-1B-Instruct. 1292 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 1292,
"language": "Python",
"license": "llama3.2",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"llama",
"facebook",
"meta",
"pytorch",
"llama-3",
"conversational",
"en",
"de",
"fr",
"it",
"pt",
"hi",
"es",
"th",
"arxiv:2204.05149",
"arxiv:2405.16406",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 1,
"parameters_active_b": 1,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen2-1.5b-instruct",
"name": "Qwen2 1.5B Instruct",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen2-1.5B-Instruct",
"description": "Open source model Qwen/Qwen2-1.5B-Instruct. 158 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 158,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen2",
"chat",
"conversational",
"en",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 4,
"context_window_tokens": 4096,
"parameters_total_b": 5,
"parameters_active_b": 5,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen2.5-coder-0.5b-instruct",
"name": "Qwen2.5 Coder 0.5B Instruct",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen2.5-Coder-0.5B-Instruct",
"description": "Open source model Qwen/Qwen2.5-Coder-0.5B-Instruct. 64 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 64,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen2",
"code",
"codeqwen",
"chat",
"qwen",
"qwen-coder",
"conversational",
"en",
"arxiv:2409.12186",
"arxiv:2407.10671",
"base_model:Qwen/Qwen2.5-Coder-0.5B",
"base_model:finetune:Qwen/Qwen2.5-Coder-0.5B",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 4,
"context_window_tokens": 4096,
"parameters_total_b": 5,
"parameters_active_b": 5,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "kimi-k2.5",
"name": "Kimi K2.5",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/mlx-community/Kimi-K2.5",
"description": "Open source model mlx-community/Kimi-K2.5. 28 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 28,
"language": "Python",
"license": "other",
"tags": [
"AI",
"LLM",
"mlx",
"safetensors",
"kimi_k25",
"conversational",
"custom_code",
"base_model:moonshotai/Kimi-K2.5",
"base_model:quantized:moonshotai/Kimi-K2.5",
"4-bit",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "mistral-7b-instruct-v0.2",
"name": "Mistral 7B Instruct V0.2",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2",
"description": "Open source model mistralai/Mistral-7B-Instruct-v0.2. 3075 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 3075,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"pytorch",
"safetensors",
"mistral",
"finetuned",
"mistral-common",
"conversational",
"arxiv:2310.06825",
"text-generation-inference",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 5,
"context_window_tokens": 4096,
"parameters_total_b": 7,
"parameters_active_b": 7,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-30b-a3b-instruct-2507",
"name": "Qwen3 30B A3B Instruct 2507",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen3-30B-A3B-Instruct-2507",
"description": "Open source model Qwen/Qwen3-30B-A3B-Instruct-2507. 766 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 766,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen3_moe",
"conversational",
"arxiv:2402.17463",
"arxiv:2407.02490",
"arxiv:2501.15383",
"arxiv:2404.06654",
"arxiv:2505.09388",
"eval-results",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "24GB+ VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 21,
"context_window_tokens": 4096,
"parameters_total_b": 30,
"parameters_active_b": 30,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "llm-jp-3-3.7b-instruct",
"name": "Llm Jp 3 3.7B Instruct",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/llm-jp/llm-jp-3-3.7b-instruct",
"description": "Open source model llm-jp/llm-jp-3-3.7b-instruct. 13 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 13,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"llama",
"conversational",
"en",
"ja",
"text-generation-inference",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 5,
"context_window_tokens": 4096,
"parameters_total_b": 7,
"parameters_active_b": 7,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "llama-3.2-3b-instruct",
"name": "Llama 3.2 3B Instruct",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct",
"description": "Open source model meta-llama/Llama-3.2-3B-Instruct. 1986 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 1986,
"language": "Python",
"license": "llama3.2",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"llama",
"facebook",
"meta",
"pytorch",
"llama-3",
"conversational",
"en",
"de",
"fr",
"it",
"pt",
"hi",
"es",
"th",
"arxiv:2204.05149",
"arxiv:2405.16406",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 2,
"context_window_tokens": 4096,
"parameters_total_b": 3,
"parameters_active_b": 3,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "distilgpt2",
"name": "Distilgpt2",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/distilbert/distilgpt2",
"description": "Open source model distilbert/distilgpt2. 609 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 609,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"pytorch",
"tf",
"jax",
"tflite",
"rust",
"coreml",
"safetensors",
"gpt2",
"exbert",
"en",
"dataset:openwebtext",
"arxiv:1910.01108",
"arxiv:2201.08542",
"arxiv:2203.12574",
"arxiv:1910.09700",
"arxiv:1503.02531",
"model-index",
"co2_eq_emissions",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-embedding-8b",
"name": "Qwen3 Embedding 8B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen3-Embedding-8B",
"description": "Open source model Qwen/Qwen3-Embedding-8B. 584 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 584,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"sentence-transformers",
"safetensors",
"qwen3",
"transformers",
"sentence-similarity",
"feature-extraction",
"text-embeddings-inference",
"arxiv:2506.05176",
"base_model:Qwen/Qwen3-8B-Base",
"base_model:finetune:Qwen/Qwen3-8B-Base",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 6,
"context_window_tokens": 4096,
"parameters_total_b": 8,
"parameters_active_b": 8,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "meta-llama-3-8b",
"name": "Meta Llama 3 8B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/meta-llama/Meta-Llama-3-8B",
"description": "Open source model meta-llama/Meta-Llama-3-8B. 6458 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 6458,
"language": "Python",
"license": "llama3",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"llama",
"facebook",
"meta",
"pytorch",
"llama-3",
"en",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 6,
"context_window_tokens": 4096,
"parameters_total_b": 8,
"parameters_active_b": 8,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "tinyllama-1.1b-chat-v1.0",
"name": "Tinyllama 1.1B Chat V1.0",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0",
"description": "Open source model TinyLlama/TinyLlama-1.1B-Chat-v1.0. 1526 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 1526,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"llama",
"conversational",
"en",
"dataset:cerebras/SlimPajama-627B",
"dataset:bigcode/starcoderdata",
"dataset:HuggingFaceH4/ultrachat_200k",
"dataset:HuggingFaceH4/ultrafeedback_binarized",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 1,
"parameters_active_b": 1,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "glm-4.7-flash",
"name": "Glm 4.7 Flash",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/zai-org/GLM-4.7-Flash",
"description": "Open source model zai-org/GLM-4.7-Flash. 1538 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 1538,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"glm4_moe_lite",
"conversational",
"en",
"zh",
"arxiv:2508.06471",
"eval-results",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "llama-3.2-1b",
"name": "Llama 3.2 1B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/meta-llama/Llama-3.2-1B",
"description": "Open source model meta-llama/Llama-3.2-1B. 2295 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 2295,
"language": "Python",
"license": "llama3.2",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"llama",
"facebook",
"meta",
"pytorch",
"llama-3",
"en",
"de",
"fr",
"it",
"pt",
"hi",
"es",
"th",
"arxiv:2204.05149",
"arxiv:2405.16406",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 1,
"parameters_active_b": 1,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-32b",
"name": "Qwen3 32B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen3-32B",
"description": "Open source model Qwen/Qwen3-32B. 656 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 656,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen3",
"conversational",
"arxiv:2309.00071",
"arxiv:2505.09388",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "24GB+ VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 22,
"context_window_tokens": 4096,
"parameters_total_b": 32,
"parameters_active_b": 32,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "llama-3.2-1b-instruct-fp8-dynamic",
"name": "Llama 3.2 1B Instruct Fp8 Dynamic",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/RedHatAI/Llama-3.2-1B-Instruct-FP8-dynamic",
"description": "Open source model RedHatAI/Llama-3.2-1B-Instruct-FP8-dynamic. 3 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 3,
"language": "Python",
"license": "llama3.2",
"tags": [
"AI",
"LLM",
"safetensors",
"llama",
"fp8",
"vllm",
"conversational",
"en",
"de",
"fr",
"it",
"pt",
"hi",
"es",
"th",
"base_model:meta-llama/Llama-3.2-1B-Instruct",
"base_model:quantized:meta-llama/Llama-3.2-1B-Instruct",
"compressed-tensors",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 1,
"parameters_active_b": 1,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen2.5-coder-1.5b-instruct",
"name": "Qwen2.5 Coder 1.5B Instruct",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct",
"description": "Open source model Qwen/Qwen2.5-Coder-1.5B-Instruct. 106 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 106,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen2",
"code",
"codeqwen",
"chat",
"qwen",
"qwen-coder",
"conversational",
"en",
"arxiv:2409.12186",
"arxiv:2407.10671",
"base_model:Qwen/Qwen2.5-Coder-1.5B",
"base_model:finetune:Qwen/Qwen2.5-Coder-1.5B",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 4,
"context_window_tokens": 4096,
"parameters_total_b": 5,
"parameters_active_b": 5,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "meta-llama-3-8b-instruct",
"name": "Meta Llama 3 8B Instruct",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct",
"description": "Open source model meta-llama/Meta-Llama-3-8B-Instruct. 4380 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 4380,
"language": "Python",
"license": "llama3",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"llama",
"facebook",
"meta",
"pytorch",
"llama-3",
"conversational",
"en",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 6,
"context_window_tokens": 4096,
"parameters_total_b": 8,
"parameters_active_b": 8,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "gemma-3-1b-it",
"name": "Gemma 3 1B It",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/google/gemma-3-1b-it",
"description": "Open source model google/gemma-3-1b-it. 842 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 842,
"language": "Python",
"license": "gemma",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"gemma3_text",
"conversational",
"arxiv:1905.07830",
"arxiv:1905.10044",
"arxiv:1911.11641",
"arxiv:1904.09728",
"arxiv:1705.03551",
"arxiv:1911.01547",
"arxiv:1907.10641",
"arxiv:1903.00161",
"arxiv:2009.03300",
"arxiv:2304.06364",
"arxiv:2103.03874",
"arxiv:2110.14168",
"arxiv:2311.12022",
"arxiv:2108.07732",
"arxiv:2107.03374",
"arxiv:2210.03057",
"arxiv:2106.03193",
"arxiv:1910.11856",
"arxiv:2502.12404",
"arxiv:2502.21228",
"arxiv:2404.16816",
"arxiv:2104.12756",
"arxiv:2311.16502",
"arxiv:2203.10244",
"arxiv:2404.12390",
"arxiv:1810.12440",
"arxiv:1908.02660",
"arxiv:2312.11805",
"base_model:google/gemma-3-1b-pt",
"base_model:finetune:google/gemma-3-1b-pt",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 1,
"parameters_active_b": 1,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "phi-2",
"name": "Phi 2",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/microsoft/phi-2",
"description": "Open source model microsoft/phi-2. 3425 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 3425,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"phi",
"nlp",
"code",
"en",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen2.5-coder-7b-instruct",
"name": "Qwen2.5 Coder 7B Instruct",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct",
"description": "Open source model Qwen/Qwen2.5-Coder-7B-Instruct. 646 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 646,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen2",
"code",
"codeqwen",
"chat",
"qwen",
"qwen-coder",
"conversational",
"en",
"arxiv:2409.12186",
"arxiv:2309.00071",
"arxiv:2407.10671",
"base_model:Qwen/Qwen2.5-Coder-7B",
"base_model:finetune:Qwen/Qwen2.5-Coder-7B",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 5,
"context_window_tokens": 4096,
"parameters_total_b": 7,
"parameters_active_b": 7,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen2.5-7b",
"name": "Qwen2.5 7B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen2.5-7B",
"description": "Open source model Qwen/Qwen2.5-7B. 264 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 264,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen2",
"conversational",
"en",
"arxiv:2407.10671",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 5,
"context_window_tokens": 4096,
"parameters_total_b": 7,
"parameters_active_b": 7,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "deepseek-r1-distill-qwen-1.5b",
"name": "Deepseek R1 Distill Qwen 1.5B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
"description": "Open source model deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B. 1446 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 1446,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen2",
"conversational",
"arxiv:2501.12948",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 4,
"context_window_tokens": 4096,
"parameters_total_b": 5,
"parameters_active_b": 5,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "deepseek-v3",
"name": "Deepseek V3",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/deepseek-ai/DeepSeek-V3",
"description": "Open source model deepseek-ai/DeepSeek-V3. 4024 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 4024,
"language": "Python",
"license": "unknown",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"deepseek_v3",
"conversational",
"custom_code",
"arxiv:2412.19437",
"eval-results",
"text-generation-inference",
"endpoints_compatible",
"fp8",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "gpt2-large",
"name": "Gpt2 Large",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/openai-community/gpt2-large",
"description": "Open source model openai-community/gpt2-large. 344 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 344,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"transformers",
"pytorch",
"tf",
"jax",
"rust",
"onnx",
"safetensors",
"gpt2",
"en",
"arxiv:1910.09700",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "glm-4.7-flash-mlx-8bit",
"name": "Glm 4.7 Flash Mlx 8Bit",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/lmstudio-community/GLM-4.7-Flash-MLX-8bit",
"description": "Open source model lmstudio-community/GLM-4.7-Flash-MLX-8bit. 9 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 9,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"glm4_moe_lite",
"mlx",
"conversational",
"en",
"zh",
"base_model:zai-org/GLM-4.7-Flash",
"base_model:quantized:zai-org/GLM-4.7-Flash",
"endpoints_compatible",
"8-bit",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 6,
"context_window_tokens": 4096,
"parameters_total_b": 8,
"parameters_active_b": 8,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "glm-4.7-flash-mlx-6bit",
"name": "Glm 4.7 Flash Mlx 6Bit",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/lmstudio-community/GLM-4.7-Flash-MLX-6bit",
"description": "Open source model lmstudio-community/GLM-4.7-Flash-MLX-6bit. 7 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 7,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"glm4_moe_lite",
"mlx",
"conversational",
"en",
"zh",
"base_model:zai-org/GLM-4.7-Flash",
"base_model:quantized:zai-org/GLM-4.7-Flash",
"endpoints_compatible",
"6-bit",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 4,
"context_window_tokens": 4096,
"parameters_total_b": 6,
"parameters_active_b": 6,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-0.6b-fp8",
"name": "Qwen3 0.6B Fp8",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen3-0.6B-FP8",
"description": "Open source model Qwen/Qwen3-0.6B-FP8. 56 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 56,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen3",
"conversational",
"arxiv:2505.09388",
"base_model:Qwen/Qwen3-0.6B",
"base_model:quantized:Qwen/Qwen3-0.6B",
"text-generation-inference",
"endpoints_compatible",
"fp8",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 4,
"context_window_tokens": 4096,
"parameters_total_b": 6,
"parameters_active_b": 6,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "llama-3.1-8b",
"name": "Llama 3.1 8B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/meta-llama/Llama-3.1-8B",
"description": "Open source model meta-llama/Llama-3.1-8B. 2065 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 2065,
"language": "Python",
"license": "llama3.1",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"llama",
"facebook",
"meta",
"pytorch",
"llama-3",
"en",
"de",
"fr",
"it",
"pt",
"hi",
"es",
"th",
"arxiv:2204.05149",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 6,
"context_window_tokens": 4096,
"parameters_total_b": 8,
"parameters_active_b": 8,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "pythia-160m",
"name": "Pythia 160M",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/EleutherAI/pythia-160m",
"description": "Open source model EleutherAI/pythia-160m. 38 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 38,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"pytorch",
"safetensors",
"gpt_neox",
"causal-lm",
"pythia",
"en",
"dataset:EleutherAI/pile",
"arxiv:2304.01373",
"arxiv:2101.00027",
"arxiv:2201.07311",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "deepseek-r1-distill-qwen-32b",
"name": "Deepseek R1 Distill Qwen 32B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
"description": "Open source model deepseek-ai/DeepSeek-R1-Distill-Qwen-32B. 1517 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 1517,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen2",
"conversational",
"arxiv:2501.12948",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "24GB+ VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 22,
"context_window_tokens": 4096,
"parameters_total_b": 32,
"parameters_active_b": 32,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "hunyuanocr",
"name": "Hunyuanocr",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/tencent/HunyuanOCR",
"description": "Open source model tencent/HunyuanOCR. 553 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 553,
"language": "Python",
"license": "other",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"hunyuan_vl",
"ocr",
"hunyuan",
"vision-language",
"image-to-text",
"1B",
"end-to-end",
"image-text-to-text",
"conversational",
"multilingual",
"arxiv:2511.19575",
"base_model:tencent/HunyuanOCR",
"base_model:finetune:tencent/HunyuanOCR",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-30b-a3b",
"name": "Qwen3 30B A3B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen3-30B-A3B",
"description": "Open source model Qwen/Qwen3-30B-A3B. 855 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 855,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen3_moe",
"conversational",
"arxiv:2309.00071",
"arxiv:2505.09388",
"base_model:Qwen/Qwen3-30B-A3B-Base",
"base_model:finetune:Qwen/Qwen3-30B-A3B-Base",
"endpoints_compatible",
"region:us"
],
"hardware_req": "24GB+ VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 21,
"context_window_tokens": 4096,
"parameters_total_b": 30,
"parameters_active_b": 30,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen2.5-0.5b",
"name": "Qwen2.5 0.5B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen2.5-0.5B",
"description": "Open source model Qwen/Qwen2.5-0.5B. 372 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 372,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen2",
"conversational",
"en",
"arxiv:2407.10671",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 4,
"context_window_tokens": 4096,
"parameters_total_b": 5,
"parameters_active_b": 5,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen2.5-32b-instruct-awq",
"name": "Qwen2.5 32B Instruct Awq",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen2.5-32B-Instruct-AWQ",
"description": "Open source model Qwen/Qwen2.5-32B-Instruct-AWQ. 94 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 94,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen2",
"chat",
"conversational",
"en",
"arxiv:2309.00071",
"arxiv:2407.10671",
"base_model:Qwen/Qwen2.5-32B-Instruct",
"base_model:quantized:Qwen/Qwen2.5-32B-Instruct",
"text-generation-inference",
"endpoints_compatible",
"4-bit",
"awq",
"region:us"
],
"hardware_req": "24GB+ VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 22,
"context_window_tokens": 4096,
"parameters_total_b": 32,
"parameters_active_b": 32,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "nvidia-nemotron-3-nano-30b-a3b-fp8",
"name": "Nvidia Nemotron 3 Nano 30B A3B Fp8",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8",
"description": "Open source model nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8. 284 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 284,
"language": "Python",
"license": "other",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"nemotron_h",
"feature-extraction",
"nvidia",
"pytorch",
"conversational",
"custom_code",
"en",
"es",
"fr",
"de",
"ja",
"it",
"dataset:nvidia/Nemotron-Pretraining-Code-v1",
"dataset:nvidia/Nemotron-CC-v2",
"dataset:nvidia/Nemotron-Pretraining-SFT-v1",
"dataset:nvidia/Nemotron-CC-Math-v1",
"dataset:nvidia/Nemotron-Pretraining-Code-v2",
"dataset:nvidia/Nemotron-Pretraining-Specialized-v1",
"dataset:nvidia/Nemotron-CC-v2.1",
"dataset:nvidia/Nemotron-CC-Code-v1",
"dataset:nvidia/Nemotron-Pretraining-Dataset-sample",
"dataset:nvidia/Nemotron-Competitive-Programming-v1",
"dataset:nvidia/Nemotron-Math-v2",
"dataset:nvidia/Nemotron-Agentic-v1",
"dataset:nvidia/Nemotron-Math-Proofs-v1",
"dataset:nvidia/Nemotron-Instruction-Following-Chat-v1",
"dataset:nvidia/Nemotron-Science-v1",
"dataset:nvidia/Nemotron-3-Nano-RL-Training-Blend",
"arxiv:2512.20848",
"arxiv:2512.20856",
"base_model:nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
"base_model:quantized:nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
"eval-results",
"region:us"
],
"hardware_req": "24GB+ VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 21,
"context_window_tokens": 4096,
"parameters_total_b": 30,
"parameters_active_b": 30,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen2.5-14b-instruct",
"name": "Qwen2.5 14B Instruct",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen2.5-14B-Instruct",
"description": "Open source model Qwen/Qwen2.5-14B-Instruct. 312 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 312,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen2",
"chat",
"conversational",
"en",
"arxiv:2309.00071",
"arxiv:2407.10671",
"base_model:Qwen/Qwen2.5-14B",
"base_model:finetune:Qwen/Qwen2.5-14B",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "16GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 10,
"context_window_tokens": 4096,
"parameters_total_b": 14,
"parameters_active_b": 14,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "nvidia-nemotron-3-nano-30b-a3b-bf16",
"name": "Nvidia Nemotron 3 Nano 30B A3B Bf16",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
"description": "Open source model nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16. 634 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 634,
"language": "Python",
"license": "other",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"nemotron_h",
"feature-extraction",
"nvidia",
"pytorch",
"conversational",
"custom_code",
"en",
"es",
"fr",
"de",
"ja",
"it",
"dataset:nvidia/Nemotron-Pretraining-Code-v1",
"dataset:nvidia/Nemotron-CC-v2",
"dataset:nvidia/Nemotron-Pretraining-SFT-v1",
"dataset:nvidia/Nemotron-CC-Math-v1",
"dataset:nvidia/Nemotron-Pretraining-Code-v2",
"dataset:nvidia/Nemotron-Pretraining-Specialized-v1",
"dataset:nvidia/Nemotron-CC-v2.1",
"dataset:nvidia/Nemotron-CC-Code-v1",
"dataset:nvidia/Nemotron-Pretraining-Dataset-sample",
"dataset:nvidia/Nemotron-Competitive-Programming-v1",
"dataset:nvidia/Nemotron-Math-v2",
"dataset:nvidia/Nemotron-Agentic-v1",
"dataset:nvidia/Nemotron-Math-Proofs-v1",
"dataset:nvidia/Nemotron-Instruction-Following-Chat-v1",
"dataset:nvidia/Nemotron-Science-v1",
"dataset:nvidia/Nemotron-3-Nano-RL-Training-Blend",
"arxiv:2512.20848",
"arxiv:2512.20856",
"eval-results",
"region:us"
],
"hardware_req": "24GB+ VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 21,
"context_window_tokens": 4096,
"parameters_total_b": 30,
"parameters_active_b": 30,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "openelm-1_1b-instruct",
"name": "Openelm 1_1B Instruct",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/apple/OpenELM-1_1B-Instruct",
"description": "Open source model apple/OpenELM-1_1B-Instruct. 72 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 72,
"language": "Python",
"license": "apple-amlr",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"openelm",
"custom_code",
"arxiv:2404.14619",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 1,
"parameters_active_b": 1,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "tiny-random-llamaforcausallm",
"name": "Tiny Random Llamaforcausallm",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/hmellor/tiny-random-LlamaForCausalLM",
"description": "Open source model hmellor/tiny-random-LlamaForCausalLM. 0 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 0,
"language": "Python",
"license": "unknown",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"llama",
"conversational",
"arxiv:1910.09700",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-next-80b-a3b-instruct",
"name": "Qwen3 Next 80B A3B Instruct",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Instruct",
"description": "Open source model Qwen/Qwen3-Next-80B-A3B-Instruct. 937 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 937,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen3_next",
"conversational",
"arxiv:2309.00071",
"arxiv:2404.06654",
"arxiv:2505.09388",
"arxiv:2501.15383",
"eval-results",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "24GB+ VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 56,
"context_window_tokens": 4096,
"parameters_total_b": 80,
"parameters_active_b": 80,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "h2ovl-mississippi-800m",
"name": "H2Ovl Mississippi 800M",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/h2oai/h2ovl-mississippi-800m",
"description": "Open source model h2oai/h2ovl-mississippi-800m. 39 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 39,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"h2ovl_chat",
"feature-extraction",
"gpt",
"llm",
"multimodal large language model",
"ocr",
"conversational",
"custom_code",
"en",
"arxiv:2410.13611",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "bloomz-560m",
"name": "Bloomz 560M",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/bigscience/bloomz-560m",
"description": "Open source model bigscience/bloomz-560m. 137 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 137,
"language": "Python",
"license": "bigscience-bloom-rail-1.0",
"tags": [
"AI",
"LLM",
"transformers",
"pytorch",
"tensorboard",
"safetensors",
"bloom",
"ak",
"ar",
"as",
"bm",
"bn",
"ca",
"code",
"en",
"es",
"eu",
"fon",
"fr",
"gu",
"hi",
"id",
"ig",
"ki",
"kn",
"lg",
"ln",
"ml",
"mr",
"ne",
"nso",
"ny",
"or",
"pa",
"pt",
"rn",
"rw",
"sn",
"st",
"sw",
"ta",
"te",
"tn",
"ts",
"tum",
"tw",
"ur",
"vi",
"wo",
"xh",
"yo",
"zh",
"zu",
"dataset:bigscience/xP3",
"arxiv:2211.01786",
"model-index",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen2.5-1.5b-quantized.w8a8",
"name": "Qwen2.5 1.5B Quantized.W8A8",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/RedHatAI/Qwen2.5-1.5B-quantized.w8a8",
"description": "Open source model RedHatAI/Qwen2.5-1.5B-quantized.w8a8. 2 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 2,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"safetensors",
"qwen2",
"chat",
"neuralmagic",
"llmcompressor",
"conversational",
"en",
"base_model:Qwen/Qwen2.5-1.5B",
"base_model:quantized:Qwen/Qwen2.5-1.5B",
"8-bit",
"compressed-tensors",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 4,
"context_window_tokens": 4096,
"parameters_total_b": 5,
"parameters_active_b": 5,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "h2ovl-mississippi-2b",
"name": "H2Ovl Mississippi 2B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/h2oai/h2ovl-mississippi-2b",
"description": "Open source model h2oai/h2ovl-mississippi-2b. 40 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 40,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"h2ovl_chat",
"feature-extraction",
"gpt",
"llm",
"multimodal large language model",
"ocr",
"conversational",
"custom_code",
"en",
"arxiv:2410.13611",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 2,
"parameters_active_b": 2,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "llava-v1.5-7b",
"name": "Llava V1.5 7B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/liuhaotian/llava-v1.5-7b",
"description": "Open source model liuhaotian/llava-v1.5-7b. 537 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 537,
"language": "Python",
"license": "unknown",
"tags": [
"AI",
"LLM",
"transformers",
"pytorch",
"llava",
"image-text-to-text",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 5,
"context_window_tokens": 4096,
"parameters_total_b": 7,
"parameters_active_b": 7,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "t5-3b",
"name": "T5 3B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/google-t5/t5-3b",
"description": "Open source model google-t5/t5-3b. 51 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 51,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"pytorch",
"tf",
"safetensors",
"t5",
"summarization",
"translation",
"en",
"fr",
"ro",
"de",
"multilingual",
"dataset:c4",
"arxiv:1805.12471",
"arxiv:1708.00055",
"arxiv:1704.05426",
"arxiv:1606.05250",
"arxiv:1808.09121",
"arxiv:1810.12885",
"arxiv:1905.10044",
"arxiv:1910.09700",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 2,
"context_window_tokens": 4096,
"parameters_total_b": 3,
"parameters_active_b": 3,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen2.5-14b-instruct-awq",
"name": "Qwen2.5 14B Instruct Awq",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-AWQ",
"description": "Open source model Qwen/Qwen2.5-14B-Instruct-AWQ. 27 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 27,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen2",
"chat",
"conversational",
"en",
"arxiv:2309.00071",
"arxiv:2407.10671",
"base_model:Qwen/Qwen2.5-14B-Instruct",
"base_model:quantized:Qwen/Qwen2.5-14B-Instruct",
"text-generation-inference",
"endpoints_compatible",
"4-bit",
"awq",
"region:us"
],
"hardware_req": "16GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 10,
"context_window_tokens": 4096,
"parameters_total_b": 14,
"parameters_active_b": 14,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "llama-3.2-3b",
"name": "Llama 3.2 3B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/meta-llama/Llama-3.2-3B",
"description": "Open source model meta-llama/Llama-3.2-3B. 697 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 697,
"language": "Python",
"license": "llama3.2",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"llama",
"facebook",
"meta",
"pytorch",
"llama-3",
"en",
"de",
"fr",
"it",
"pt",
"hi",
"es",
"th",
"arxiv:2204.05149",
"arxiv:2405.16406",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 2,
"context_window_tokens": 4096,
"parameters_total_b": 3,
"parameters_active_b": 3,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "phi-3-mini-4k-instruct-gptq-4bit",
"name": "Phi 3 Mini 4K Instruct Gptq 4Bit",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/kaitchup/Phi-3-mini-4k-instruct-gptq-4bit",
"description": "Open source model kaitchup/Phi-3-mini-4k-instruct-gptq-4bit. 2 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 2,
"language": "Python",
"license": "unknown",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"phi3",
"conversational",
"custom_code",
"arxiv:1910.09700",
"text-generation-inference",
"endpoints_compatible",
"4-bit",
"gptq",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 3,
"context_window_tokens": 4096,
"parameters_total_b": 4,
"parameters_active_b": 4,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen2.5-72b-instruct-awq",
"name": "Qwen2.5 72B Instruct Awq",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct-AWQ",
"description": "Open source model Qwen/Qwen2.5-72B-Instruct-AWQ. 74 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 74,
"language": "Python",
"license": "other",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen2",
"chat",
"conversational",
"en",
"arxiv:2309.00071",
"arxiv:2407.10671",
"base_model:Qwen/Qwen2.5-72B-Instruct",
"base_model:quantized:Qwen/Qwen2.5-72B-Instruct",
"text-generation-inference",
"endpoints_compatible",
"4-bit",
"awq",
"region:us"
],
"hardware_req": "24GB+ VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 50,
"context_window_tokens": 4096,
"parameters_total_b": 72,
"parameters_active_b": 72,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "smollm2-135m",
"name": "Smollm2 135M",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/HuggingFaceTB/SmolLM2-135M",
"description": "Open source model HuggingFaceTB/SmolLM2-135M. 166 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 166,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"llama",
"en",
"arxiv:2502.02737",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "llama-3.3-70b-instruct",
"name": "Llama 3.3 70B Instruct",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct",
"description": "Open source model meta-llama/Llama-3.3-70B-Instruct. 2658 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 2658,
"language": "Python",
"license": "llama3.3",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"llama",
"facebook",
"meta",
"pytorch",
"llama-3",
"conversational",
"en",
"fr",
"it",
"pt",
"hi",
"es",
"th",
"de",
"arxiv:2204.05149",
"base_model:meta-llama/Llama-3.1-70B",
"base_model:finetune:meta-llama/Llama-3.1-70B",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "24GB+ VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 49,
"context_window_tokens": 4096,
"parameters_total_b": 70,
"parameters_active_b": 70,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-30b-a3b-instruct-2507-fp8",
"name": "Qwen3 30B A3B Instruct 2507 Fp8",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen3-30B-A3B-Instruct-2507-FP8",
"description": "Open source model Qwen/Qwen3-30B-A3B-Instruct-2507-FP8. 112 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 112,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen3_moe",
"conversational",
"arxiv:2505.09388",
"base_model:Qwen/Qwen3-30B-A3B-Instruct-2507",
"base_model:quantized:Qwen/Qwen3-30B-A3B-Instruct-2507",
"endpoints_compatible",
"fp8",
"deploy:azure",
"region:us"
],
"hardware_req": "24GB+ VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 21,
"context_window_tokens": 4096,
"parameters_total_b": 30,
"parameters_active_b": 30,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen2.5-coder-32b-instruct",
"name": "Qwen2.5 Coder 32B Instruct",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct",
"description": "Open source model Qwen/Qwen2.5-Coder-32B-Instruct. 1995 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 1995,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen2",
"code",
"codeqwen",
"chat",
"qwen",
"qwen-coder",
"conversational",
"en",
"arxiv:2409.12186",
"arxiv:2309.00071",
"arxiv:2407.10671",
"base_model:Qwen/Qwen2.5-Coder-32B",
"base_model:finetune:Qwen/Qwen2.5-Coder-32B",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "24GB+ VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 22,
"context_window_tokens": 4096,
"parameters_total_b": 32,
"parameters_active_b": 32,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-235b-a22b-instruct-2507-fp8",
"name": "Qwen3 235B A22B Instruct 2507 Fp8",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen3-235B-A22B-Instruct-2507-FP8",
"description": "Open source model Qwen/Qwen3-235B-A22B-Instruct-2507-FP8. 145 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 145,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen3_moe",
"conversational",
"arxiv:2505.09388",
"base_model:Qwen/Qwen3-235B-A22B-Instruct-2507",
"base_model:quantized:Qwen/Qwen3-235B-A22B-Instruct-2507",
"endpoints_compatible",
"fp8",
"deploy:azure",
"region:us"
],
"hardware_req": "24GB+ VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 164,
"context_window_tokens": 4096,
"parameters_total_b": 235,
"parameters_active_b": 235,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "deepseek-r1-distill-qwen-7b",
"name": "Deepseek R1 Distill Qwen 7B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
"description": "Open source model deepseek-ai/DeepSeek-R1-Distill-Qwen-7B. 787 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 787,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen2",
"conversational",
"arxiv:2501.12948",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 5,
"context_window_tokens": 4096,
"parameters_total_b": 7,
"parameters_active_b": 7,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "phi-3-mini-4k-instruct",
"name": "Phi 3 Mini 4K Instruct",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct",
"description": "Open source model microsoft/Phi-3-mini-4k-instruct. 1386 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 1386,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"phi3",
"nlp",
"code",
"conversational",
"custom_code",
"en",
"fr",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-14b",
"name": "Qwen3 14B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen3-14B",
"description": "Open source model Qwen/Qwen3-14B. 366 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 366,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen3",
"conversational",
"arxiv:2309.00071",
"arxiv:2505.09388",
"base_model:Qwen/Qwen3-14B-Base",
"base_model:finetune:Qwen/Qwen3-14B-Base",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "16GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 10,
"context_window_tokens": 4096,
"parameters_total_b": 14,
"parameters_active_b": 14,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen2.5-coder-1.5b",
"name": "Qwen2.5 Coder 1.5B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B",
"description": "Open source model Qwen/Qwen2.5-Coder-1.5B. 81 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 81,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen2",
"code",
"qwen",
"qwen-coder",
"codeqwen",
"conversational",
"en",
"arxiv:2409.12186",
"arxiv:2407.10671",
"base_model:Qwen/Qwen2.5-1.5B",
"base_model:finetune:Qwen/Qwen2.5-1.5B",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 4,
"context_window_tokens": 4096,
"parameters_total_b": 5,
"parameters_active_b": 5,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "llama-3.1-70b-instruct",
"name": "Llama 3.1 70B Instruct",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct",
"description": "Open source model meta-llama/Llama-3.1-70B-Instruct. 890 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 890,
"language": "Python",
"license": "llama3.1",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"llama",
"facebook",
"meta",
"pytorch",
"llama-3",
"conversational",
"en",
"de",
"fr",
"it",
"pt",
"hi",
"es",
"th",
"arxiv:2204.05149",
"base_model:meta-llama/Llama-3.1-70B",
"base_model:finetune:meta-llama/Llama-3.1-70B",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "24GB+ VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 49,
"context_window_tokens": 4096,
"parameters_total_b": 70,
"parameters_active_b": 70,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "hunyuanimage-3.0",
"name": "Hunyuanimage 3.0",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/tencent/HunyuanImage-3.0",
"description": "Open source model tencent/HunyuanImage-3.0. 640 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 640,
"language": "Python",
"license": "other",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"hunyuan_image_3_moe",
"text-to-image",
"custom_code",
"arxiv:2509.23951",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen2.5-coder-7b-instruct-awq",
"name": "Qwen2.5 Coder 7B Instruct Awq",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct-AWQ",
"description": "Open source model Qwen/Qwen2.5-Coder-7B-Instruct-AWQ. 19 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 19,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen2",
"code",
"codeqwen",
"chat",
"qwen",
"qwen-coder",
"conversational",
"en",
"arxiv:2409.12186",
"arxiv:2309.00071",
"arxiv:2407.10671",
"base_model:Qwen/Qwen2.5-Coder-7B-Instruct",
"base_model:quantized:Qwen/Qwen2.5-Coder-7B-Instruct",
"text-generation-inference",
"endpoints_compatible",
"4-bit",
"awq",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 5,
"context_window_tokens": 4096,
"parameters_total_b": 7,
"parameters_active_b": 7,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-coder-30b-a3b-instruct",
"name": "Qwen3 Coder 30B A3B Instruct",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen3-Coder-30B-A3B-Instruct",
"description": "Open source model Qwen/Qwen3-Coder-30B-A3B-Instruct. 945 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 945,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen3_moe",
"conversational",
"arxiv:2505.09388",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "24GB+ VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 21,
"context_window_tokens": 4096,
"parameters_total_b": 30,
"parameters_active_b": 30,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "deepseek-r1-0528",
"name": "Deepseek R1 0528",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
"description": "Open source model deepseek-ai/DeepSeek-R1-0528. 2400 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 2400,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"deepseek_v3",
"conversational",
"custom_code",
"arxiv:2501.12948",
"eval-results",
"text-generation-inference",
"endpoints_compatible",
"fp8",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "tiny-random-llama-3",
"name": "Tiny Random Llama 3",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/llamafactory/tiny-random-Llama-3",
"description": "Open source model llamafactory/tiny-random-Llama-3. 3 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 3,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"llama",
"conversational",
"text-generation-inference",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen2.5-coder-32b-instruct-awq",
"name": "Qwen2.5 Coder 32B Instruct Awq",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct-AWQ",
"description": "Open source model Qwen/Qwen2.5-Coder-32B-Instruct-AWQ. 33 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 33,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen2",
"code",
"codeqwen",
"chat",
"qwen",
"qwen-coder",
"conversational",
"en",
"arxiv:2409.12186",
"arxiv:2309.00071",
"arxiv:2407.10671",
"base_model:Qwen/Qwen2.5-Coder-32B-Instruct",
"base_model:quantized:Qwen/Qwen2.5-Coder-32B-Instruct",
"text-generation-inference",
"endpoints_compatible",
"4-bit",
"awq",
"deploy:azure",
"region:us"
],
"hardware_req": "24GB+ VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 22,
"context_window_tokens": 4096,
"parameters_total_b": 32,
"parameters_active_b": 32,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "mistral-7b-instruct-v0.1",
"name": "Mistral 7B Instruct V0.1",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1",
"description": "Open source model mistralai/Mistral-7B-Instruct-v0.1. 1826 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 1826,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"pytorch",
"safetensors",
"mistral",
"finetuned",
"mistral-common",
"conversational",
"arxiv:2310.06825",
"base_model:mistralai/Mistral-7B-v0.1",
"base_model:finetune:mistralai/Mistral-7B-v0.1",
"text-generation-inference",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 5,
"context_window_tokens": 4096,
"parameters_total_b": 7,
"parameters_active_b": 7,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "gpt-oss-20b-mxfp4-q8",
"name": "Gpt Oss 20B Mxfp4 Q8",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/mlx-community/gpt-oss-20b-MXFP4-Q8",
"description": "Open source model mlx-community/gpt-oss-20b-MXFP4-Q8. 31 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 31,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"mlx",
"safetensors",
"gpt_oss",
"vllm",
"conversational",
"base_model:openai/gpt-oss-20b",
"base_model:quantized:openai/gpt-oss-20b",
"4-bit",
"region:us"
],
"hardware_req": "16GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 14,
"context_window_tokens": 4096,
"parameters_total_b": 20,
"parameters_active_b": 20,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-embedding-4b",
"name": "Qwen3 Embedding 4B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen3-Embedding-4B",
"description": "Open source model Qwen/Qwen3-Embedding-4B. 224 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 224,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"sentence-transformers",
"safetensors",
"qwen3",
"transformers",
"sentence-similarity",
"feature-extraction",
"text-embeddings-inference",
"arxiv:2506.05176",
"base_model:Qwen/Qwen3-4B-Base",
"base_model:finetune:Qwen/Qwen3-4B-Base",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 3,
"context_window_tokens": 4096,
"parameters_total_b": 4,
"parameters_active_b": 4,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen2.5-1.5b-instruct-awq",
"name": "Qwen2.5 1.5B Instruct Awq",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-AWQ",
"description": "Open source model Qwen/Qwen2.5-1.5B-Instruct-AWQ. 6 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 6,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen2",
"chat",
"conversational",
"en",
"arxiv:2407.10671",
"base_model:Qwen/Qwen2.5-1.5B-Instruct",
"base_model:quantized:Qwen/Qwen2.5-1.5B-Instruct",
"text-generation-inference",
"endpoints_compatible",
"4-bit",
"awq",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 4,
"context_window_tokens": 4096,
"parameters_total_b": 5,
"parameters_active_b": 5,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "meta-llama-3.1-8b-instruct-fp8",
"name": "Meta Llama 3.1 8B Instruct Fp8",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8",
"description": "Open source model RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8. 44 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 44,
"language": "Python",
"license": "llama3.1",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"llama",
"fp8",
"vllm",
"conversational",
"en",
"de",
"fr",
"it",
"pt",
"hi",
"es",
"th",
"base_model:meta-llama/Llama-3.1-8B-Instruct",
"base_model:quantized:meta-llama/Llama-3.1-8B-Instruct",
"text-generation-inference",
"endpoints_compatible",
"compressed-tensors",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 6,
"context_window_tokens": 4096,
"parameters_total_b": 8,
"parameters_active_b": 8,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "phi-4",
"name": "Phi 4",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/microsoft/phi-4",
"description": "Open source model microsoft/phi-4. 2220 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 2220,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"phi3",
"phi",
"nlp",
"math",
"code",
"chat",
"conversational",
"en",
"arxiv:2412.08905",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "deepseek-r1",
"name": "Deepseek R1",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/deepseek-ai/DeepSeek-R1",
"description": "Open source model deepseek-ai/DeepSeek-R1. 13011 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 13011,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"deepseek_v3",
"conversational",
"custom_code",
"arxiv:2501.12948",
"eval-results",
"text-generation-inference",
"endpoints_compatible",
"fp8",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "llama-3.2-1b-instruct-fp8",
"name": "Llama 3.2 1B Instruct Fp8",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/RedHatAI/Llama-3.2-1B-Instruct-FP8",
"description": "Open source model RedHatAI/Llama-3.2-1B-Instruct-FP8. 3 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 3,
"language": "Python",
"license": "llama3.2",
"tags": [
"AI",
"LLM",
"safetensors",
"llama",
"llama-3",
"neuralmagic",
"llmcompressor",
"conversational",
"en",
"de",
"fr",
"it",
"pt",
"hi",
"es",
"th",
"base_model:meta-llama/Llama-3.2-1B-Instruct",
"base_model:quantized:meta-llama/Llama-3.2-1B-Instruct",
"compressed-tensors",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 1,
"parameters_active_b": 1,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "llama-3.1-405b",
"name": "Llama 3.1 405B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/meta-llama/Llama-3.1-405B",
"description": "Open source model meta-llama/Llama-3.1-405B. 961 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 961,
"language": "Python",
"license": "llama3.1",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"llama",
"facebook",
"meta",
"pytorch",
"llama-3",
"en",
"de",
"fr",
"it",
"pt",
"hi",
"es",
"th",
"arxiv:2204.05149",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "24GB+ VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 284,
"context_window_tokens": 4096,
"parameters_total_b": 405,
"parameters_active_b": 405,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-4b-thinking-2507",
"name": "Qwen3 4B Thinking 2507",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen3-4B-Thinking-2507",
"description": "Open source model Qwen/Qwen3-4B-Thinking-2507. 548 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 548,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen3",
"conversational",
"arxiv:2505.09388",
"eval-results",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 3,
"context_window_tokens": 4096,
"parameters_total_b": 4,
"parameters_active_b": 4,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "gpt2-medium",
"name": "Gpt2 Medium",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/openai-community/gpt2-medium",
"description": "Open source model openai-community/gpt2-medium. 193 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 193,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"transformers",
"pytorch",
"tf",
"jax",
"rust",
"onnx",
"safetensors",
"gpt2",
"en",
"arxiv:1910.09700",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "tiny-gpt2",
"name": "Tiny Gpt2",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/sshleifer/tiny-gpt2",
"description": "Open source model sshleifer/tiny-gpt2. 34 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 34,
"language": "Python",
"license": "unknown",
"tags": [
"AI",
"LLM",
"transformers",
"pytorch",
"tf",
"jax",
"gpt2",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "hermes-3-llama-3.1-8b",
"name": "Hermes 3 Llama 3.1 8B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B",
"description": "Open source model NousResearch/Hermes-3-Llama-3.1-8B. 385 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 385,
"language": "Python",
"license": "llama3",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"llama",
"Llama-3",
"instruct",
"finetune",
"chatml",
"gpt4",
"synthetic data",
"distillation",
"function calling",
"json mode",
"axolotl",
"roleplaying",
"chat",
"conversational",
"en",
"arxiv:2408.11857",
"base_model:meta-llama/Llama-3.1-8B",
"base_model:finetune:meta-llama/Llama-3.1-8B",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 6,
"context_window_tokens": 4096,
"parameters_total_b": 8,
"parameters_active_b": 8,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "phi-3.5-vision-instruct",
"name": "Phi 3.5 Vision Instruct",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/microsoft/Phi-3.5-vision-instruct",
"description": "Open source model microsoft/Phi-3.5-vision-instruct. 726 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 726,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"phi3_v",
"nlp",
"code",
"vision",
"image-text-to-text",
"conversational",
"custom_code",
"multilingual",
"arxiv:2404.14219",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": true
},
"referral_url": ""
},
{
"slug": "minimax-m2",
"name": "Minimax M2",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/MiniMaxAI/MiniMax-M2",
"description": "Open source model MiniMaxAI/MiniMax-M2. 1485 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 1485,
"language": "Python",
"license": "other",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"minimax_m2",
"conversational",
"custom_code",
"arxiv:2504.07164",
"arxiv:2509.06501",
"arxiv:2509.13160",
"eval-results",
"endpoints_compatible",
"fp8",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "deepseek-r1-distill-llama-8b",
"name": "Deepseek R1 Distill Llama 8B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
"description": "Open source model deepseek-ai/DeepSeek-R1-Distill-Llama-8B. 843 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 843,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"llama",
"conversational",
"arxiv:2501.12948",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 6,
"context_window_tokens": 4096,
"parameters_total_b": 8,
"parameters_active_b": 8,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-14b-awq",
"name": "Qwen3 14B Awq",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen3-14B-AWQ",
"description": "Open source model Qwen/Qwen3-14B-AWQ. 57 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 57,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen3",
"conversational",
"arxiv:2309.00071",
"arxiv:2505.09388",
"base_model:Qwen/Qwen3-14B",
"base_model:quantized:Qwen/Qwen3-14B",
"text-generation-inference",
"endpoints_compatible",
"4-bit",
"awq",
"region:us"
],
"hardware_req": "16GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 10,
"context_window_tokens": 4096,
"parameters_total_b": 14,
"parameters_active_b": 14,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-235b-a22b",
"name": "Qwen3 235B A22B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen3-235B-A22B",
"description": "Open source model Qwen/Qwen3-235B-A22B. 1075 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 1075,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen3_moe",
"conversational",
"arxiv:2309.00071",
"arxiv:2505.09388",
"endpoints_compatible",
"region:us"
],
"hardware_req": "24GB+ VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 164,
"context_window_tokens": 4096,
"parameters_total_b": 235,
"parameters_active_b": 235,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "meta-llama-3.1-8b-instruct-awq-int4",
"name": "Meta Llama 3.1 8B Instruct Awq Int4",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4",
"description": "Open source model hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4. 87 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 87,
"language": "Python",
"license": "llama3.1",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"llama",
"llama-3.1",
"meta",
"autoawq",
"conversational",
"en",
"de",
"fr",
"it",
"pt",
"hi",
"es",
"th",
"text-generation-inference",
"endpoints_compatible",
"4-bit",
"awq",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 6,
"context_window_tokens": 4096,
"parameters_total_b": 8,
"parameters_active_b": 8,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "lfm2.5-1.2b-instruct-mlx-8bit",
"name": "Lfm2.5 1.2B Instruct Mlx 8Bit",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/lmstudio-community/LFM2.5-1.2B-Instruct-MLX-8bit",
"description": "Open source model lmstudio-community/LFM2.5-1.2B-Instruct-MLX-8bit. 1 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 1,
"language": "Python",
"license": "other",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"lfm2",
"liquid",
"lfm2.5",
"edge",
"mlx",
"conversational",
"en",
"ar",
"zh",
"fr",
"de",
"ja",
"ko",
"es",
"base_model:LiquidAI/LFM2.5-1.2B-Instruct",
"base_model:quantized:LiquidAI/LFM2.5-1.2B-Instruct",
"endpoints_compatible",
"8-bit",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 2,
"parameters_active_b": 2,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "glm-4.7-flash-gguf",
"name": "Glm 4.7 Flash Gguf",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/unsloth/GLM-4.7-Flash-GGUF",
"description": "Open source model unsloth/GLM-4.7-Flash-GGUF. 482 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 482,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"transformers",
"gguf",
"unsloth",
"en",
"zh",
"arxiv:2508.06471",
"base_model:zai-org/GLM-4.7-Flash",
"base_model:quantized:zai-org/GLM-4.7-Flash",
"endpoints_compatible",
"deploy:azure",
"region:us",
"imatrix",
"conversational"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "deepseek-r1-distill-qwen-14b",
"name": "Deepseek R1 Distill Qwen 14B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
"description": "Open source model deepseek-ai/DeepSeek-R1-Distill-Qwen-14B. 603 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 603,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen2",
"conversational",
"arxiv:2501.12948",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "16GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 10,
"context_window_tokens": 4096,
"parameters_total_b": 14,
"parameters_active_b": 14,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "lfm2.5-1.2b-instruct-mlx-6bit",
"name": "Lfm2.5 1.2B Instruct Mlx 6Bit",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/lmstudio-community/LFM2.5-1.2B-Instruct-MLX-6bit",
"description": "Open source model lmstudio-community/LFM2.5-1.2B-Instruct-MLX-6bit. 4 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 4,
"language": "Python",
"license": "other",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"lfm2",
"liquid",
"lfm2.5",
"edge",
"mlx",
"conversational",
"en",
"ar",
"zh",
"fr",
"de",
"ja",
"ko",
"es",
"base_model:LiquidAI/LFM2.5-1.2B-Instruct",
"base_model:quantized:LiquidAI/LFM2.5-1.2B-Instruct",
"endpoints_compatible",
"6-bit",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 2,
"parameters_active_b": 2,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "lfm2.5-1.2b-instruct-mlx-4bit",
"name": "Lfm2.5 1.2B Instruct Mlx 4Bit",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/lmstudio-community/LFM2.5-1.2B-Instruct-MLX-4bit",
"description": "Open source model lmstudio-community/LFM2.5-1.2B-Instruct-MLX-4bit. 1 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 1,
"language": "Python",
"license": "other",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"lfm2",
"liquid",
"lfm2.5",
"edge",
"mlx",
"conversational",
"en",
"ar",
"zh",
"fr",
"de",
"ja",
"ko",
"es",
"base_model:LiquidAI/LFM2.5-1.2B-Instruct",
"base_model:quantized:LiquidAI/LFM2.5-1.2B-Instruct",
"endpoints_compatible",
"4-bit",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 2,
"parameters_active_b": 2,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "vicuna-7b-v1.5",
"name": "Vicuna 7B V1.5",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/lmsys/vicuna-7b-v1.5",
"description": "Open source model lmsys/vicuna-7b-v1.5. 387 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 387,
"language": "Python",
"license": "llama2",
"tags": [
"AI",
"LLM",
"transformers",
"pytorch",
"llama",
"arxiv:2307.09288",
"arxiv:2306.05685",
"text-generation-inference",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 5,
"context_window_tokens": 4096,
"parameters_total_b": 7,
"parameters_active_b": 7,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "llama-3.2-1b-instruct-q8_0-gguf",
"name": "Llama 3.2 1B Instruct Q8_0 Gguf",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/hugging-quants/Llama-3.2-1B-Instruct-Q8_0-GGUF",
"description": "Open source model hugging-quants/Llama-3.2-1B-Instruct-Q8_0-GGUF. 43 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 43,
"language": "Python",
"license": "unknown",
"tags": [
"AI",
"LLM",
"gguf",
"facebook",
"meta",
"pytorch",
"llama",
"llama-3",
"llama-cpp",
"gguf-my-repo",
"en",
"de",
"fr",
"it",
"pt",
"hi",
"es",
"th",
"base_model:meta-llama/Llama-3.2-1B-Instruct",
"base_model:quantized:meta-llama/Llama-3.2-1B-Instruct",
"endpoints_compatible",
"region:us",
"conversational"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 1,
"parameters_active_b": 1,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "llama-3.3-70b-instruct-awq",
"name": "Llama 3.3 70B Instruct Awq",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/kosbu/Llama-3.3-70B-Instruct-AWQ",
"description": "Open source model kosbu/Llama-3.3-70B-Instruct-AWQ. 10 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 10,
"language": "Python",
"license": "llama3.3",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"llama",
"facebook",
"meta",
"llama-3",
"awq",
"conversational",
"en",
"fr",
"it",
"pt",
"hi",
"es",
"th",
"de",
"base_model:meta-llama/Llama-3.3-70B-Instruct",
"base_model:quantized:meta-llama/Llama-3.3-70B-Instruct",
"text-generation-inference",
"endpoints_compatible",
"4-bit",
"deploy:azure",
"region:us"
],
"hardware_req": "24GB+ VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 49,
"context_window_tokens": 4096,
"parameters_total_b": 70,
"parameters_active_b": 70,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-32b-fp8",
"name": "Qwen3 32B Fp8",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen3-32B-FP8",
"description": "Open source model Qwen/Qwen3-32B-FP8. 80 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 80,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen3",
"conversational",
"arxiv:2309.00071",
"arxiv:2505.09388",
"base_model:Qwen/Qwen3-32B",
"base_model:quantized:Qwen/Qwen3-32B",
"text-generation-inference",
"endpoints_compatible",
"fp8",
"region:us"
],
"hardware_req": "24GB+ VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 22,
"context_window_tokens": 4096,
"parameters_total_b": 32,
"parameters_active_b": 32,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "gpt2-xl",
"name": "Gpt2 Xl",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/openai-community/gpt2-xl",
"description": "Open source model openai-community/gpt2-xl. 373 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 373,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"transformers",
"pytorch",
"tf",
"jax",
"rust",
"safetensors",
"gpt2",
"en",
"arxiv:1910.09700",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-4b-instruct-2507-fp8",
"name": "Qwen3 4B Instruct 2507 Fp8",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507-FP8",
"description": "Open source model Qwen/Qwen3-4B-Instruct-2507-FP8. 65 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 65,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen3",
"conversational",
"arxiv:2505.09388",
"base_model:Qwen/Qwen3-4B-Instruct-2507",
"base_model:quantized:Qwen/Qwen3-4B-Instruct-2507",
"text-generation-inference",
"endpoints_compatible",
"fp8",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 3,
"context_window_tokens": 4096,
"parameters_total_b": 4,
"parameters_active_b": 4,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "xlnet-base-cased",
"name": "Xlnet Base Cased",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/xlnet/xlnet-base-cased",
"description": "Open source model xlnet/xlnet-base-cased. 80 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 80,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"transformers",
"pytorch",
"tf",
"rust",
"xlnet",
"en",
"dataset:bookcorpus",
"dataset:wikipedia",
"arxiv:1906.08237",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "llama-2-7b-hf",
"name": "Llama 2 7B Hf",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/meta-llama/Llama-2-7b-hf",
"description": "Open source model meta-llama/Llama-2-7b-hf. 2268 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 2268,
"language": "Python",
"license": "llama2",
"tags": [
"AI",
"LLM",
"transformers",
"pytorch",
"safetensors",
"llama",
"facebook",
"meta",
"llama-2",
"en",
"arxiv:2307.09288",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 5,
"context_window_tokens": 4096,
"parameters_total_b": 7,
"parameters_active_b": 7,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen2.5-math-7b-instruct",
"name": "Qwen2.5 Math 7B Instruct",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen2.5-Math-7B-Instruct",
"description": "Open source model Qwen/Qwen2.5-Math-7B-Instruct. 89 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 89,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen2",
"chat",
"conversational",
"en",
"arxiv:2409.12122",
"base_model:Qwen/Qwen2.5-Math-7B",
"base_model:finetune:Qwen/Qwen2.5-Math-7B",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 5,
"context_window_tokens": 4096,
"parameters_total_b": 7,
"parameters_active_b": 7,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-reranker-0.6b",
"name": "Qwen3 Reranker 0.6B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen3-Reranker-0.6B",
"description": "Open source model Qwen/Qwen3-Reranker-0.6B. 305 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 305,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen3",
"text-ranking",
"arxiv:2506.05176",
"base_model:Qwen/Qwen3-0.6B-Base",
"base_model:finetune:Qwen/Qwen3-0.6B-Base",
"text-embeddings-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 4,
"context_window_tokens": 4096,
"parameters_total_b": 6,
"parameters_active_b": 6,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen2.5-1.5b",
"name": "Qwen2.5 1.5B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen2.5-1.5B",
"description": "Open source model Qwen/Qwen2.5-1.5B. 165 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 165,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen2",
"conversational",
"en",
"arxiv:2407.10671",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 4,
"context_window_tokens": 4096,
"parameters_total_b": 5,
"parameters_active_b": 5,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-30b-a3b-thinking-2507",
"name": "Qwen3 30B A3B Thinking 2507",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen3-30B-A3B-Thinking-2507",
"description": "Open source model Qwen/Qwen3-30B-A3B-Thinking-2507. 359 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 359,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen3_moe",
"conversational",
"arxiv:2402.17463",
"arxiv:2407.02490",
"arxiv:2501.15383",
"arxiv:2404.06654",
"arxiv:2505.09388",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "24GB+ VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 21,
"context_window_tokens": 4096,
"parameters_total_b": 30,
"parameters_active_b": 30,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "smollm2-135m-instruct",
"name": "Smollm2 135M Instruct",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/HuggingFaceTB/SmolLM2-135M-Instruct",
"description": "Open source model HuggingFaceTB/SmolLM2-135M-Instruct. 292 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 292,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"tensorboard",
"onnx",
"safetensors",
"llama",
"transformers.js",
"conversational",
"en",
"arxiv:2502.02737",
"base_model:HuggingFaceTB/SmolLM2-135M",
"base_model:quantized:HuggingFaceTB/SmolLM2-135M",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen2.5-math-1.5b",
"name": "Qwen2.5 Math 1.5B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen2.5-Math-1.5B",
"description": "Open source model Qwen/Qwen2.5-Math-1.5B. 100 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 100,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen2",
"conversational",
"en",
"arxiv:2409.12122",
"base_model:Qwen/Qwen2.5-1.5B",
"base_model:finetune:Qwen/Qwen2.5-1.5B",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 4,
"context_window_tokens": 4096,
"parameters_total_b": 5,
"parameters_active_b": 5,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "glm-4.5-air-awq-4bit",
"name": "Glm 4.5 Air Awq 4Bit",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/cyankiwi/GLM-4.5-Air-AWQ-4bit",
"description": "Open source model cyankiwi/GLM-4.5-Air-AWQ-4bit. 27 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 27,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"glm4_moe",
"conversational",
"en",
"zh",
"arxiv:2508.06471",
"base_model:zai-org/GLM-4.5-Air",
"base_model:quantized:zai-org/GLM-4.5-Air",
"endpoints_compatible",
"compressed-tensors",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 3,
"context_window_tokens": 4096,
"parameters_total_b": 4,
"parameters_active_b": 4,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "llama-2-7b-chat-hf",
"name": "Llama 2 7B Chat Hf",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/meta-llama/Llama-2-7b-chat-hf",
"description": "Open source model meta-llama/Llama-2-7b-chat-hf. 4705 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 4705,
"language": "Python",
"license": "llama2",
"tags": [
"AI",
"LLM",
"transformers",
"pytorch",
"safetensors",
"llama",
"facebook",
"meta",
"llama-2",
"conversational",
"en",
"arxiv:2307.09288",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 5,
"context_window_tokens": 4096,
"parameters_total_b": 7,
"parameters_active_b": 7,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen2.5-coder-7b-instruct-gptq-int4",
"name": "Qwen2.5 Coder 7B Instruct Gptq Int4",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-Int4",
"description": "Open source model Qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-Int4. 12 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 12,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen2",
"code",
"codeqwen",
"chat",
"qwen",
"qwen-coder",
"conversational",
"en",
"arxiv:2409.12186",
"arxiv:2309.00071",
"arxiv:2407.10671",
"base_model:Qwen/Qwen2.5-Coder-7B-Instruct",
"base_model:quantized:Qwen/Qwen2.5-Coder-7B-Instruct",
"text-generation-inference",
"endpoints_compatible",
"4-bit",
"gptq",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 5,
"context_window_tokens": 4096,
"parameters_total_b": 7,
"parameters_active_b": 7,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-vl-30b-a3b-instruct-awq",
"name": "Qwen3 Vl 30B A3B Instruct Awq",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/QuantTrio/Qwen3-VL-30B-A3B-Instruct-AWQ",
"description": "Open source model QuantTrio/Qwen3-VL-30B-A3B-Instruct-AWQ. 38 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 38,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen3_vl_moe",
"image-text-to-text",
"AWQ",
"vLLM",
"conversational",
"arxiv:2505.09388",
"arxiv:2502.13923",
"arxiv:2409.12191",
"arxiv:2308.12966",
"base_model:Qwen/Qwen3-VL-30B-A3B-Instruct",
"base_model:quantized:Qwen/Qwen3-VL-30B-A3B-Instruct",
"endpoints_compatible",
"4-bit",
"awq",
"region:us"
],
"hardware_req": "24GB+ VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 21,
"context_window_tokens": 4096,
"parameters_total_b": 30,
"parameters_active_b": 30,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-8b-base",
"name": "Qwen3 8B Base",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen3-8B-Base",
"description": "Open source model Qwen/Qwen3-8B-Base. 82 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 82,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen3",
"conversational",
"arxiv:2505.09388",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 6,
"context_window_tokens": 4096,
"parameters_total_b": 8,
"parameters_active_b": 8,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen2.5-coder-14b-instruct",
"name": "Qwen2.5 Coder 14B Instruct",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen2.5-Coder-14B-Instruct",
"description": "Open source model Qwen/Qwen2.5-Coder-14B-Instruct. 140 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 140,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen2",
"code",
"codeqwen",
"chat",
"qwen",
"qwen-coder",
"conversational",
"en",
"arxiv:2409.12186",
"arxiv:2309.00071",
"arxiv:2407.10671",
"base_model:Qwen/Qwen2.5-Coder-14B",
"base_model:finetune:Qwen/Qwen2.5-Coder-14B",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "16GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 10,
"context_window_tokens": 4096,
"parameters_total_b": 14,
"parameters_active_b": 14,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "stories15m_moe",
"name": "Stories15M_Moe",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/ggml-org/stories15M_MOE",
"description": "Open source model ggml-org/stories15M_MOE. 5 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 5,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"gguf",
"mixtral",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "opt-1.3b",
"name": "Opt 1.3B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/facebook/opt-1.3b",
"description": "Open source model facebook/opt-1.3b. 182 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 182,
"language": "Python",
"license": "other",
"tags": [
"AI",
"LLM",
"transformers",
"pytorch",
"tf",
"jax",
"opt",
"en",
"arxiv:2205.01068",
"arxiv:2005.14165",
"text-generation-inference",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 2,
"context_window_tokens": 4096,
"parameters_total_b": 3,
"parameters_active_b": 3,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "minimax-m2-awq",
"name": "Minimax M2 Awq",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/QuantTrio/MiniMax-M2-AWQ",
"description": "Open source model QuantTrio/MiniMax-M2-AWQ. 8 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 8,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"mixtral",
"vLLM",
"AWQ",
"conversational",
"arxiv:2504.07164",
"arxiv:2509.06501",
"arxiv:2509.13160",
"base_model:MiniMaxAI/MiniMax-M2",
"base_model:quantized:MiniMaxAI/MiniMax-M2",
"text-generation-inference",
"endpoints_compatible",
"4-bit",
"awq",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "glm-4.7-flash-nvfp4",
"name": "Glm 4.7 Flash Nvfp4",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/GadflyII/GLM-4.7-Flash-NVFP4",
"description": "Open source model GadflyII/GLM-4.7-Flash-NVFP4. 62 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 62,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"glm4_moe_lite",
"moe",
"nvfp4",
"quantized",
"vllm",
"glm",
"30b",
"conversational",
"en",
"zh",
"base_model:zai-org/GLM-4.7-Flash",
"base_model:quantized:zai-org/GLM-4.7-Flash",
"endpoints_compatible",
"compressed-tensors",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "hy-mt1.5-7b",
"name": "Hy Mt1.5 7B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/tencent/HY-MT1.5-7B",
"description": "Open source model tencent/HY-MT1.5-7B. 133 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 133,
"language": "Python",
"license": "unknown",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"hunyuan_v1_dense",
"translation",
"zh",
"en",
"fr",
"pt",
"es",
"ja",
"tr",
"ru",
"ar",
"ko",
"th",
"it",
"de",
"vi",
"ms",
"id",
"tl",
"hi",
"pl",
"cs",
"nl",
"km",
"my",
"fa",
"gu",
"ur",
"te",
"mr",
"he",
"bn",
"ta",
"uk",
"bo",
"kk",
"mn",
"ug",
"arxiv:2512.24092",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 5,
"context_window_tokens": 4096,
"parameters_total_b": 7,
"parameters_active_b": 7,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "gemma-2-27b-it",
"name": "Gemma 2 27B It",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/google/gemma-2-27b-it",
"description": "Open source model google/gemma-2-27b-it. 559 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 559,
"language": "Python",
"license": "gemma",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"gemma2",
"conversational",
"arxiv:2009.03300",
"arxiv:1905.07830",
"arxiv:1911.11641",
"arxiv:1904.09728",
"arxiv:1905.10044",
"arxiv:1907.10641",
"arxiv:1811.00937",
"arxiv:1809.02789",
"arxiv:1911.01547",
"arxiv:1705.03551",
"arxiv:2107.03374",
"arxiv:2108.07732",
"arxiv:2110.14168",
"arxiv:2009.11462",
"arxiv:2101.11718",
"arxiv:2110.08193",
"arxiv:1804.09301",
"arxiv:2109.07958",
"arxiv:1804.06876",
"arxiv:2103.03874",
"arxiv:2304.06364",
"arxiv:2206.04615",
"arxiv:2203.09509",
"base_model:google/gemma-2-27b",
"base_model:finetune:google/gemma-2-27b",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "24GB+ VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 19,
"context_window_tokens": 4096,
"parameters_total_b": 27,
"parameters_active_b": 27,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-coder-next-gguf",
"name": "Qwen3 Coder Next Gguf",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/unsloth/Qwen3-Coder-Next-GGUF",
"description": "Open source model unsloth/Qwen3-Coder-Next-GGUF. 347 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 347,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"gguf",
"qwen3_next",
"unsloth",
"qwen",
"qwen3",
"base_model:Qwen/Qwen3-Coder-Next",
"base_model:quantized:Qwen/Qwen3-Coder-Next",
"endpoints_compatible",
"region:us",
"imatrix",
"conversational"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "gte-qwen2-1.5b-instruct",
"name": "Gte Qwen2 1.5B Instruct",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Alibaba-NLP/gte-Qwen2-1.5B-instruct",
"description": "Open source model Alibaba-NLP/gte-Qwen2-1.5B-instruct. 229 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 229,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"sentence-transformers",
"safetensors",
"qwen2",
"mteb",
"transformers",
"Qwen2",
"sentence-similarity",
"custom_code",
"arxiv:2308.03281",
"model-index",
"text-embeddings-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 4,
"context_window_tokens": 4096,
"parameters_total_b": 5,
"parameters_active_b": 5,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "lfm2-1.2b",
"name": "Lfm2 1.2B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/LiquidAI/LFM2-1.2B",
"description": "Open source model LiquidAI/LFM2-1.2B. 349 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 349,
"language": "Python",
"license": "other",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"lfm2",
"liquid",
"edge",
"conversational",
"en",
"ar",
"zh",
"fr",
"de",
"ja",
"ko",
"es",
"arxiv:2511.23404",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 2,
"parameters_active_b": 2,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "saiga_llama3_8b",
"name": "Saiga_Llama3_8B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/IlyaGusev/saiga_llama3_8b",
"description": "Open source model IlyaGusev/saiga_llama3_8b. 137 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 137,
"language": "Python",
"license": "other",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"llama",
"conversational",
"ru",
"dataset:IlyaGusev/saiga_scored",
"doi:10.57967/hf/2368",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 6,
"context_window_tokens": 4096,
"parameters_total_b": 8,
"parameters_active_b": 8,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-1.7b-base",
"name": "Qwen3 1.7B Base",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen3-1.7B-Base",
"description": "Open source model Qwen/Qwen3-1.7B-Base. 62 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 62,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen3",
"conversational",
"arxiv:2505.09388",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 5,
"context_window_tokens": 4096,
"parameters_total_b": 7,
"parameters_active_b": 7,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "mistral-7b-v0.3-bnb-4bit",
"name": "Mistral 7B V0.3 Bnb 4Bit",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/unsloth/mistral-7b-v0.3-bnb-4bit",
"description": "Open source model unsloth/mistral-7b-v0.3-bnb-4bit. 22 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 22,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"mistral",
"unsloth",
"mistral-7b",
"en",
"base_model:mistralai/Mistral-7B-v0.3",
"base_model:quantized:mistralai/Mistral-7B-v0.3",
"text-generation-inference",
"endpoints_compatible",
"4-bit",
"bitsandbytes",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 5,
"context_window_tokens": 4096,
"parameters_total_b": 7,
"parameters_active_b": 7,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "gemma-2-2b-it",
"name": "Gemma 2 2B It",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/google/gemma-2-2b-it",
"description": "Open source model google/gemma-2-2b-it. 1285 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 1285,
"language": "Python",
"license": "gemma",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"gemma2",
"conversational",
"arxiv:2009.03300",
"arxiv:1905.07830",
"arxiv:1911.11641",
"arxiv:1904.09728",
"arxiv:1905.10044",
"arxiv:1907.10641",
"arxiv:1811.00937",
"arxiv:1809.02789",
"arxiv:1911.01547",
"arxiv:1705.03551",
"arxiv:2107.03374",
"arxiv:2108.07732",
"arxiv:2110.14168",
"arxiv:2009.11462",
"arxiv:2101.11718",
"arxiv:2110.08193",
"arxiv:1804.09301",
"arxiv:2109.07958",
"arxiv:1804.06876",
"arxiv:2103.03874",
"arxiv:2304.06364",
"arxiv:1903.00161",
"arxiv:2206.04615",
"arxiv:2203.09509",
"arxiv:2403.13793",
"base_model:google/gemma-2-2b",
"base_model:finetune:google/gemma-2-2b",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 2,
"parameters_active_b": 2,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "phi-4-multimodal-instruct",
"name": "Phi 4 Multimodal Instruct",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/microsoft/Phi-4-multimodal-instruct",
"description": "Open source model microsoft/Phi-4-multimodal-instruct. 1573 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 1573,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"phi4mm",
"nlp",
"code",
"audio",
"automatic-speech-recognition",
"speech-summarization",
"speech-translation",
"visual-question-answering",
"phi-4-multimodal",
"phi",
"phi-4-mini",
"custom_code",
"multilingual",
"ar",
"zh",
"cs",
"da",
"nl",
"en",
"fi",
"fr",
"de",
"he",
"hu",
"it",
"ja",
"ko",
"no",
"pl",
"pt",
"ru",
"es",
"sv",
"th",
"tr",
"uk",
"arxiv:2503.01743",
"arxiv:2407.13833",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "pythia-70m-deduped",
"name": "Pythia 70M Deduped",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/EleutherAI/pythia-70m-deduped",
"description": "Open source model EleutherAI/pythia-70m-deduped. 27 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 27,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"pytorch",
"safetensors",
"gpt_neox",
"causal-lm",
"pythia",
"en",
"dataset:EleutherAI/the_pile_deduplicated",
"arxiv:2304.01373",
"arxiv:2101.00027",
"arxiv:2201.07311",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "dialogpt-medium",
"name": "Dialogpt Medium",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/microsoft/DialoGPT-medium",
"description": "Open source model microsoft/DialoGPT-medium. 433 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 433,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"transformers",
"pytorch",
"tf",
"jax",
"rust",
"gpt2",
"conversational",
"arxiv:1911.00536",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "gpt-oss-20b-bf16",
"name": "Gpt Oss 20B Bf16",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/unsloth/gpt-oss-20b-BF16",
"description": "Open source model unsloth/gpt-oss-20b-BF16. 29 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 29,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"gpt_oss",
"vllm",
"unsloth",
"conversational",
"base_model:openai/gpt-oss-20b",
"base_model:finetune:openai/gpt-oss-20b",
"endpoints_compatible",
"region:us"
],
"hardware_req": "16GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 14,
"context_window_tokens": 4096,
"parameters_total_b": 20,
"parameters_active_b": 20,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen2.5-72b-instruct",
"name": "Qwen2.5 72B Instruct",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
"description": "Open source model Qwen/Qwen2.5-72B-Instruct. 910 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 910,
"language": "Python",
"license": "other",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen2",
"chat",
"conversational",
"en",
"arxiv:2309.00071",
"arxiv:2407.10671",
"base_model:Qwen/Qwen2.5-72B",
"base_model:finetune:Qwen/Qwen2.5-72B",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "24GB+ VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 50,
"context_window_tokens": 4096,
"parameters_total_b": 72,
"parameters_active_b": 72,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-32b-awq",
"name": "Qwen3 32B Awq",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen3-32B-AWQ",
"description": "Open source model Qwen/Qwen3-32B-AWQ. 125 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 125,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen3",
"conversational",
"arxiv:2309.00071",
"arxiv:2505.09388",
"base_model:Qwen/Qwen3-32B",
"base_model:quantized:Qwen/Qwen3-32B",
"text-generation-inference",
"endpoints_compatible",
"4-bit",
"awq",
"region:us"
],
"hardware_req": "24GB+ VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 22,
"context_window_tokens": 4096,
"parameters_total_b": 32,
"parameters_active_b": 32,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "mimo-v2-flash",
"name": "Mimo V2 Flash",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/XiaomiMiMo/MiMo-V2-Flash",
"description": "Open source model XiaomiMiMo/MiMo-V2-Flash. 628 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 628,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"mimo_v2_flash",
"conversational",
"custom_code",
"eval-results",
"fp8",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-coder-30b-a3b-instruct-fp8",
"name": "Qwen3 Coder 30B A3B Instruct Fp8",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8",
"description": "Open source model Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8. 158 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 158,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen3_moe",
"conversational",
"arxiv:2505.09388",
"endpoints_compatible",
"fp8",
"deploy:azure",
"region:us"
],
"hardware_req": "24GB+ VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 21,
"context_window_tokens": 4096,
"parameters_total_b": 30,
"parameters_active_b": 30,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-8b-fp8",
"name": "Qwen3 8B Fp8",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen3-8B-FP8",
"description": "Open source model Qwen/Qwen3-8B-FP8. 56 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 56,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen3",
"conversational",
"arxiv:2309.00071",
"arxiv:2505.09388",
"base_model:Qwen/Qwen3-8B",
"base_model:quantized:Qwen/Qwen3-8B",
"text-generation-inference",
"endpoints_compatible",
"fp8",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 6,
"context_window_tokens": 4096,
"parameters_total_b": 8,
"parameters_active_b": 8,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "deepseek-v3.2",
"name": "Deepseek V3.2",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/deepseek-ai/DeepSeek-V3.2",
"description": "Open source model deepseek-ai/DeepSeek-V3.2. 1251 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 1251,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"deepseek_v32",
"conversational",
"base_model:deepseek-ai/DeepSeek-V3.2-Exp-Base",
"base_model:finetune:deepseek-ai/DeepSeek-V3.2-Exp-Base",
"eval-results",
"endpoints_compatible",
"fp8",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-coder-next",
"name": "Qwen3 Coder Next",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen3-Coder-Next",
"description": "Open source model Qwen/Qwen3-Coder-Next. 912 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 912,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen3_next",
"conversational",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen2-0.5b",
"name": "Qwen2 0.5B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen2-0.5B",
"description": "Open source model Qwen/Qwen2-0.5B. 164 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 164,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen2",
"pretrained",
"conversational",
"en",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 4,
"context_window_tokens": 4096,
"parameters_total_b": 5,
"parameters_active_b": 5,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "mistral-7b-v0.1",
"name": "Mistral 7B V0.1",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/mistralai/Mistral-7B-v0.1",
"description": "Open source model mistralai/Mistral-7B-v0.1. 4042 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 4042,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"pytorch",
"safetensors",
"mistral",
"pretrained",
"mistral-common",
"en",
"arxiv:2310.06825",
"text-generation-inference",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 5,
"context_window_tokens": 4096,
"parameters_total_b": 7,
"parameters_active_b": 7,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "kimi-k2-thinking",
"name": "Kimi K2 Thinking",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/moonshotai/Kimi-K2-Thinking",
"description": "Open source model moonshotai/Kimi-K2-Thinking. 1670 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 1670,
"language": "Python",
"license": "other",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"kimi_k2",
"conversational",
"custom_code",
"eval-results",
"endpoints_compatible",
"compressed-tensors",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "deepseek-r1-0528-qwen3-8b-mlx-4bit",
"name": "Deepseek R1 0528 Qwen3 8B Mlx 4Bit",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/lmstudio-community/DeepSeek-R1-0528-Qwen3-8B-MLX-4bit",
"description": "Open source model lmstudio-community/DeepSeek-R1-0528-Qwen3-8B-MLX-4bit. 7 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 7,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"mlx",
"safetensors",
"qwen3",
"conversational",
"base_model:deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
"base_model:quantized:deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
"4-bit",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 6,
"context_window_tokens": 4096,
"parameters_total_b": 8,
"parameters_active_b": 8,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen2.5-7b-instruct-awq",
"name": "Qwen2.5 7B Instruct Awq",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct-AWQ",
"description": "Open source model Qwen/Qwen2.5-7B-Instruct-AWQ. 36 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 36,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen2",
"chat",
"conversational",
"en",
"arxiv:2309.00071",
"arxiv:2407.10671",
"base_model:Qwen/Qwen2.5-7B-Instruct",
"base_model:quantized:Qwen/Qwen2.5-7B-Instruct",
"text-generation-inference",
"endpoints_compatible",
"4-bit",
"awq",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 5,
"context_window_tokens": 4096,
"parameters_total_b": 7,
"parameters_active_b": 7,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "points-reader",
"name": "Points Reader",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/tencent/POINTS-Reader",
"description": "Open source model tencent/POINTS-Reader. 100 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 100,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"image-text-to-text",
"conversational",
"custom_code",
"arxiv:2509.01215",
"arxiv:2412.08443",
"arxiv:2409.04828",
"arxiv:2405.11850",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-4b-base",
"name": "Qwen3 4B Base",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen3-4B-Base",
"description": "Open source model Qwen/Qwen3-4B-Base. 80 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 80,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen3",
"conversational",
"arxiv:2505.09388",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 3,
"context_window_tokens": 4096,
"parameters_total_b": 4,
"parameters_active_b": 4,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "step-3.5-flash",
"name": "Step 3.5 Flash",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/stepfun-ai/Step-3.5-Flash",
"description": "Open source model stepfun-ai/Step-3.5-Flash. 621 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 621,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"step3p5",
"conversational",
"custom_code",
"arxiv:2602.10604",
"arxiv:2601.05593",
"arxiv:2507.19427",
"eval-results",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "kogpt2-base-v2",
"name": "Kogpt2 Base V2",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/skt/kogpt2-base-v2",
"description": "Open source model skt/kogpt2-base-v2. 60 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 60,
"language": "Python",
"license": "cc-by-nc-sa-4.0",
"tags": [
"AI",
"LLM",
"transformers",
"pytorch",
"jax",
"gpt2",
"ko",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "parler-tts-mini-multilingual-v1.1",
"name": "Parler Tts Mini Multilingual V1.1",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/parler-tts/parler-tts-mini-multilingual-v1.1",
"description": "Open source model parler-tts/parler-tts-mini-multilingual-v1.1. 54 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 54,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"parler_tts",
"text-to-speech",
"annotation",
"en",
"fr",
"es",
"pt",
"pl",
"de",
"nl",
"it",
"dataset:facebook/multilingual_librispeech",
"dataset:parler-tts/libritts_r_filtered",
"dataset:parler-tts/libritts-r-filtered-speaker-descriptions",
"dataset:parler-tts/mls_eng",
"dataset:parler-tts/mls-eng-speaker-descriptions",
"dataset:ylacombe/mls-annotated",
"dataset:ylacombe/cml-tts-filtered-annotated",
"dataset:PHBJT/cml-tts-filtered",
"arxiv:2402.01912",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-reranker-8b",
"name": "Qwen3 Reranker 8B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen3-Reranker-8B",
"description": "Open source model Qwen/Qwen3-Reranker-8B. 213 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 213,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen3",
"text-ranking",
"arxiv:2506.05176",
"base_model:Qwen/Qwen3-8B-Base",
"base_model:finetune:Qwen/Qwen3-8B-Base",
"text-embeddings-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 6,
"context_window_tokens": 4096,
"parameters_total_b": 8,
"parameters_active_b": 8,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "deepseek-r1-0528-qwen3-8b-mlx-8bit",
"name": "Deepseek R1 0528 Qwen3 8B Mlx 8Bit",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/lmstudio-community/DeepSeek-R1-0528-Qwen3-8B-MLX-8bit",
"description": "Open source model lmstudio-community/DeepSeek-R1-0528-Qwen3-8B-MLX-8bit. 13 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 13,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"mlx",
"safetensors",
"qwen3",
"conversational",
"base_model:deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
"base_model:quantized:deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
"8-bit",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 6,
"context_window_tokens": 4096,
"parameters_total_b": 8,
"parameters_active_b": 8,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "powermoe-3b",
"name": "Powermoe 3B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/ibm-research/PowerMoE-3b",
"description": "Open source model ibm-research/PowerMoE-3b. 14 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 14,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"granitemoe",
"arxiv:2408.13359",
"model-index",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 2,
"context_window_tokens": 4096,
"parameters_total_b": 3,
"parameters_active_b": 3,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "llada-8b-instruct",
"name": "Llada 8B Instruct",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/GSAI-ML/LLaDA-8B-Instruct",
"description": "Open source model GSAI-ML/LLaDA-8B-Instruct. 342 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 342,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"llada",
"conversational",
"custom_code",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 6,
"context_window_tokens": 4096,
"parameters_total_b": 8,
"parameters_active_b": 8,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "apertus-8b-instruct-2509",
"name": "Apertus 8B Instruct 2509",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/swiss-ai/Apertus-8B-Instruct-2509",
"description": "Open source model swiss-ai/Apertus-8B-Instruct-2509. 435 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 435,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"apertus",
"multilingual",
"compliant",
"swiss-ai",
"conversational",
"arxiv:2509.14233",
"base_model:swiss-ai/Apertus-8B-2509",
"base_model:finetune:swiss-ai/Apertus-8B-2509",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 6,
"context_window_tokens": 4096,
"parameters_total_b": 8,
"parameters_active_b": 8,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-30b-a3b-gptq-int4",
"name": "Qwen3 30B A3B Gptq Int4",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen3-30B-A3B-GPTQ-Int4",
"description": "Open source model Qwen/Qwen3-30B-A3B-GPTQ-Int4. 45 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 45,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen3_moe",
"conversational",
"arxiv:2309.00071",
"arxiv:2505.09388",
"base_model:Qwen/Qwen3-30B-A3B",
"base_model:quantized:Qwen/Qwen3-30B-A3B",
"endpoints_compatible",
"4-bit",
"gptq",
"region:us"
],
"hardware_req": "24GB+ VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 21,
"context_window_tokens": 4096,
"parameters_total_b": 30,
"parameters_active_b": 30,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "tinyllama-1.1b-chat-v0.3-gptq",
"name": "Tinyllama 1.1B Chat V0.3 Gptq",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GPTQ",
"description": "Open source model TheBloke/TinyLlama-1.1B-Chat-v0.3-GPTQ. 9 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 9,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"llama",
"en",
"dataset:cerebras/SlimPajama-627B",
"dataset:bigcode/starcoderdata",
"dataset:OpenAssistant/oasst_top1_2023-08-25",
"base_model:TinyLlama/TinyLlama-1.1B-Chat-v0.3",
"base_model:quantized:TinyLlama/TinyLlama-1.1B-Chat-v0.3",
"text-generation-inference",
"4-bit",
"gptq",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 1,
"parameters_active_b": 1,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "prot_t5_xl_bfd",
"name": "Prot_T5_Xl_Bfd",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Rostlab/prot_t5_xl_bfd",
"description": "Open source model Rostlab/prot_t5_xl_bfd. 10 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 10,
"language": "Python",
"license": "unknown",
"tags": [
"AI",
"LLM",
"transformers",
"pytorch",
"tf",
"t5",
"protein language model",
"dataset:BFD",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-4b-instruct-2507-unsloth-bnb-4bit",
"name": "Qwen3 4B Instruct 2507 Unsloth Bnb 4Bit",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/unsloth/Qwen3-4B-Instruct-2507-unsloth-bnb-4bit",
"description": "Open source model unsloth/Qwen3-4B-Instruct-2507-unsloth-bnb-4bit. 13 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 13,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen3",
"unsloth",
"conversational",
"arxiv:2505.09388",
"base_model:Qwen/Qwen3-4B-Instruct-2507",
"base_model:quantized:Qwen/Qwen3-4B-Instruct-2507",
"text-generation-inference",
"endpoints_compatible",
"4-bit",
"bitsandbytes",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 3,
"context_window_tokens": 4096,
"parameters_total_b": 4,
"parameters_active_b": 4,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "phi-3.5-mini-instruct",
"name": "Phi 3.5 Mini Instruct",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/microsoft/Phi-3.5-mini-instruct",
"description": "Open source model microsoft/Phi-3.5-mini-instruct. 963 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 963,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"phi3",
"nlp",
"code",
"conversational",
"custom_code",
"multilingual",
"arxiv:2404.14219",
"arxiv:2407.13833",
"arxiv:2403.06412",
"eval-results",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "meta-llama-3.1-8b-instruct-bnb-4bit",
"name": "Meta Llama 3.1 8B Instruct Bnb 4Bit",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
"description": "Open source model unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit. 95 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 95,
"language": "Python",
"license": "llama3.1",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"llama",
"llama-3",
"meta",
"facebook",
"unsloth",
"conversational",
"en",
"arxiv:2204.05149",
"base_model:meta-llama/Llama-3.1-8B-Instruct",
"base_model:quantized:meta-llama/Llama-3.1-8B-Instruct",
"text-generation-inference",
"endpoints_compatible",
"4-bit",
"bitsandbytes",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 6,
"context_window_tokens": 4096,
"parameters_total_b": 8,
"parameters_active_b": 8,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "glm-4.7-flash-awq-4bit",
"name": "Glm 4.7 Flash Awq 4Bit",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/cyankiwi/GLM-4.7-Flash-AWQ-4bit",
"description": "Open source model cyankiwi/GLM-4.7-Flash-AWQ-4bit. 43 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 43,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"glm4_moe_lite",
"conversational",
"en",
"zh",
"arxiv:2508.06471",
"base_model:zai-org/GLM-4.7-Flash",
"base_model:quantized:zai-org/GLM-4.7-Flash",
"endpoints_compatible",
"compressed-tensors",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 3,
"context_window_tokens": 4096,
"parameters_total_b": 4,
"parameters_active_b": 4,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "dots.ocr",
"name": "Dots.Ocr",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/rednote-hilab/dots.ocr",
"description": "Open source model rednote-hilab/dots.ocr. 1243 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 1243,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"dots_ocr",
"safetensors",
"image-to-text",
"ocr",
"document-parse",
"layout",
"table",
"formula",
"transformers",
"custom_code",
"image-text-to-text",
"conversational",
"en",
"zh",
"multilingual",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "mistral-7b-bnb-4bit",
"name": "Mistral 7B Bnb 4Bit",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/unsloth/mistral-7b-bnb-4bit",
"description": "Open source model unsloth/mistral-7b-bnb-4bit. 30 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 30,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"mistral",
"unsloth",
"mistral-7b",
"bnb",
"en",
"text-generation-inference",
"endpoints_compatible",
"4-bit",
"bitsandbytes",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 5,
"context_window_tokens": 4096,
"parameters_total_b": 7,
"parameters_active_b": 7,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "glm-5-fp8",
"name": "Glm 5 Fp8",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/zai-org/GLM-5-FP8",
"description": "Open source model zai-org/GLM-5-FP8. 108 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 108,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"glm_moe_dsa",
"conversational",
"en",
"zh",
"endpoints_compatible",
"fp8",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen-7b",
"name": "Qwen 7B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen-7B",
"description": "Open source model Qwen/Qwen-7B. 395 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 395,
"language": "Python",
"license": "other",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen",
"custom_code",
"zh",
"en",
"arxiv:2309.16609",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 5,
"context_window_tokens": 4096,
"parameters_total_b": 7,
"parameters_active_b": 7,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwq-32b-awq",
"name": "Qwq 32B Awq",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/QwQ-32B-AWQ",
"description": "Open source model Qwen/QwQ-32B-AWQ. 133 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 133,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"safetensors",
"qwen2",
"chat",
"conversational",
"en",
"arxiv:2309.00071",
"arxiv:2412.15115",
"base_model:Qwen/QwQ-32B",
"base_model:quantized:Qwen/QwQ-32B",
"4-bit",
"awq",
"region:us"
],
"hardware_req": "24GB+ VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 22,
"context_window_tokens": 4096,
"parameters_total_b": 32,
"parameters_active_b": 32,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "deepseek-r1-distill-llama-70b",
"name": "Deepseek R1 Distill Llama 70B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
"description": "Open source model deepseek-ai/DeepSeek-R1-Distill-Llama-70B. 741 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 741,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"llama",
"conversational",
"arxiv:2501.12948",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "24GB+ VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 49,
"context_window_tokens": 4096,
"parameters_total_b": 70,
"parameters_active_b": 70,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen2.5-coder-7b",
"name": "Qwen2.5 Coder 7B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen2.5-Coder-7B",
"description": "Open source model Qwen/Qwen2.5-Coder-7B. 134 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 134,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen2",
"code",
"qwen",
"qwen-coder",
"codeqwen",
"conversational",
"en",
"arxiv:2409.12186",
"arxiv:2309.00071",
"arxiv:2407.10671",
"base_model:Qwen/Qwen2.5-7B",
"base_model:finetune:Qwen/Qwen2.5-7B",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 5,
"context_window_tokens": 4096,
"parameters_total_b": 7,
"parameters_active_b": 7,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen2.5-3b",
"name": "Qwen2.5 3B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen2.5-3B",
"description": "Open source model Qwen/Qwen2.5-3B. 169 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 169,
"language": "Python",
"license": "other",
"tags": [
"AI",
"LLM",
"safetensors",
"qwen2",
"conversational",
"en",
"arxiv:2407.10671",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 2,
"context_window_tokens": 4096,
"parameters_total_b": 3,
"parameters_active_b": 3,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "deepseek-v2-lite-chat",
"name": "Deepseek V2 Lite Chat",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/deepseek-ai/DeepSeek-V2-Lite-Chat",
"description": "Open source model deepseek-ai/DeepSeek-V2-Lite-Chat. 133 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 133,
"language": "Python",
"license": "other",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"deepseek_v2",
"conversational",
"custom_code",
"arxiv:2405.04434",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "tiny-qwen3forcausallm",
"name": "Tiny Qwen3Forcausallm",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/trl-internal-testing/tiny-Qwen3ForCausalLM",
"description": "Open source model trl-internal-testing/tiny-Qwen3ForCausalLM. 1 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 1,
"language": "Python",
"license": "unknown",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen3",
"trl",
"conversational",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "deepseek-coder-v2-lite-instruct",
"name": "Deepseek Coder V2 Lite Instruct",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct",
"description": "Open source model deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct. 539 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 539,
"language": "Python",
"license": "other",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"deepseek_v2",
"conversational",
"custom_code",
"arxiv:2401.06066",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen3-0.6b-base",
"name": "Qwen3 0.6B Base",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen3-0.6B-Base",
"description": "Open source model Qwen/Qwen3-0.6B-Base. 146 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 146,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen3",
"conversational",
"arxiv:2505.09388",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 4,
"context_window_tokens": 4096,
"parameters_total_b": 6,
"parameters_active_b": 6,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "diffractgpt_mistral_chemical_formula",
"name": "Diffractgpt_Mistral_Chemical_Formula",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/knc6/diffractgpt_mistral_chemical_formula",
"description": "Open source model knc6/diffractgpt_mistral_chemical_formula. 1 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 1,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"peft",
"safetensors",
"chemistry",
"text-generation-inference",
"atomgpt",
"diffraction",
"en",
"base_model:unsloth/mistral-7b-bnb-4bit",
"base_model:adapter:unsloth/mistral-7b-bnb-4bit",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "qwen-7b-chat",
"name": "Qwen 7B Chat",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Qwen/Qwen-7B-Chat",
"description": "Open source model Qwen/Qwen-7B-Chat. 787 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 787,
"language": "Python",
"license": "other",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"qwen",
"custom_code",
"zh",
"en",
"arxiv:2309.16609",
"arxiv:2305.08322",
"arxiv:2009.03300",
"arxiv:2305.05280",
"arxiv:2210.03629",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 5,
"context_window_tokens": 4096,
"parameters_total_b": 7,
"parameters_active_b": 7,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "nvidia-nemotron-3-nano-30b-a3b-nvfp4",
"name": "Nvidia Nemotron 3 Nano 30B A3B Nvfp4",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4",
"description": "Open source model nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4. 100 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 100,
"language": "Python",
"license": "other",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"nemotron_h",
"feature-extraction",
"nvidia",
"pytorch",
"conversational",
"custom_code",
"en",
"es",
"fr",
"de",
"ja",
"it",
"dataset:nvidia/Nemotron-Pretraining-Code-v1",
"dataset:nvidia/Nemotron-CC-v2",
"dataset:nvidia/Nemotron-Pretraining-SFT-v1",
"dataset:nvidia/Nemotron-CC-Math-v1",
"dataset:nvidia/Nemotron-Pretraining-Code-v2",
"dataset:nvidia/Nemotron-Pretraining-Specialized-v1",
"dataset:nvidia/Nemotron-CC-v2.1",
"dataset:nvidia/Nemotron-CC-Code-v1",
"dataset:nvidia/Nemotron-Pretraining-Dataset-sample",
"dataset:nvidia/Nemotron-Competitive-Programming-v1",
"dataset:nvidia/Nemotron-Math-v2",
"dataset:nvidia/Nemotron-Agentic-v1",
"dataset:nvidia/Nemotron-Math-Proofs-v1",
"dataset:nvidia/Nemotron-Instruction-Following-Chat-v1",
"dataset:nvidia/Nemotron-Science-v1",
"dataset:nvidia/Nemotron-3-Nano-RL-Training-Blend",
"arxiv:2512.20848",
"arxiv:2512.20856",
"arxiv:2601.20088",
"base_model:nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
"base_model:quantized:nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
"region:us"
],
"hardware_req": "24GB+ VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 21,
"context_window_tokens": 4096,
"parameters_total_b": 30,
"parameters_active_b": 30,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "falcon-h1-tiny-90m-instruct",
"name": "Falcon H1 Tiny 90M Instruct",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/tiiuae/Falcon-H1-Tiny-90M-Instruct",
"description": "Open source model tiiuae/Falcon-H1-Tiny-90M-Instruct. 31 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 31,
"language": "Python",
"license": "other",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"falcon_h1",
"falcon-h1",
"edge",
"conversational",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "hermes-3-llama-3.2-3b",
"name": "Hermes 3 Llama 3.2 3B",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/NousResearch/Hermes-3-Llama-3.2-3B",
"description": "Open source model NousResearch/Hermes-3-Llama-3.2-3B. 174 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 174,
"language": "Python",
"license": "llama3",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"llama",
"Llama-3",
"instruct",
"finetune",
"chatml",
"gpt4",
"synthetic data",
"distillation",
"function calling",
"json mode",
"axolotl",
"roleplaying",
"chat",
"conversational",
"en",
"arxiv:2408.11857",
"text-generation-inference",
"endpoints_compatible",
"deploy:azure",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 2,
"context_window_tokens": 4096,
"parameters_total_b": 3,
"parameters_active_b": 3,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "meta-llama-3.1-8b-instruct",
"name": "Meta Llama 3.1 8B Instruct",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/unsloth/Meta-Llama-3.1-8B-Instruct",
"description": "Open source model unsloth/Meta-Llama-3.1-8B-Instruct. 94 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 94,
"language": "Python",
"license": "llama3.1",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"llama",
"llama-3",
"meta",
"facebook",
"unsloth",
"conversational",
"en",
"base_model:meta-llama/Llama-3.1-8B-Instruct",
"base_model:finetune:meta-llama/Llama-3.1-8B-Instruct",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 6,
"context_window_tokens": 4096,
"parameters_total_b": 8,
"parameters_active_b": 8,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "meta-llama-3.1-8b-instruct-gguf",
"name": "Meta Llama 3.1 8B Instruct Gguf",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
"description": "Open source model bartowski/Meta-Llama-3.1-8B-Instruct-GGUF. 321 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 321,
"language": "Python",
"license": "llama3.1",
"tags": [
"AI",
"LLM",
"gguf",
"facebook",
"meta",
"pytorch",
"llama",
"llama-3",
"en",
"de",
"fr",
"it",
"pt",
"hi",
"es",
"th",
"base_model:meta-llama/Llama-3.1-8B-Instruct",
"base_model:quantized:meta-llama/Llama-3.1-8B-Instruct",
"endpoints_compatible",
"region:us",
"conversational"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 6,
"context_window_tokens": 4096,
"parameters_total_b": 8,
"parameters_active_b": 8,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "deepseek-v3-0324",
"name": "Deepseek V3 0324",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/deepseek-ai/DeepSeek-V3-0324",
"description": "Open source model deepseek-ai/DeepSeek-V3-0324. 3087 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 3087,
"language": "Python",
"license": "mit",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"deepseek_v3",
"conversational",
"custom_code",
"arxiv:2412.19437",
"eval-results",
"text-generation-inference",
"endpoints_compatible",
"fp8",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "elm",
"name": "Elm",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/Joaoffg/ELM",
"description": "Open source model Joaoffg/ELM. 2 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 2,
"language": "Python",
"license": "llama2",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"llama",
"academic",
"university",
"en",
"nl",
"arxiv:2408.06931",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "llama-2-13b-chat-hf",
"name": "Llama 2 13B Chat Hf",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/meta-llama/Llama-2-13b-chat-hf",
"description": "Open source model meta-llama/Llama-2-13b-chat-hf. 1109 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 1109,
"language": "Python",
"license": "llama2",
"tags": [
"AI",
"LLM",
"transformers",
"pytorch",
"safetensors",
"llama",
"facebook",
"meta",
"llama-2",
"conversational",
"en",
"arxiv:2307.09288",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "16GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 9,
"context_window_tokens": 4096,
"parameters_total_b": 13,
"parameters_active_b": 13,
"is_multimodal": false
},
"referral_url": ""
},
{
"slug": "svara-tts-v1",
"name": "Svara Tts V1",
"category": "AI Models",
"is_open_source": true,
"website": "https://huggingface.co/kenpath/svara-tts-v1",
"description": "Open source model kenpath/svara-tts-v1. 18 likes on Hugging Face.",
"pros": [
"Open Source",
"Running Locally"
],
"cons": [
"Requires GPU"
],
"stars": 18,
"language": "Python",
"license": "apache-2.0",
"tags": [
"AI",
"LLM",
"transformers",
"safetensors",
"llama",
"text-to-speech",
"speech-synthesis",
"multilingual",
"indic",
"orpheus",
"lora",
"low-latency",
"gguf",
"zero-shot",
"emotions",
"discrete-audio-tokens",
"hi",
"bn",
"mr",
"te",
"kn",
"bho",
"mag",
"hne",
"mai",
"as",
"brx",
"doi",
"gu",
"ml",
"pa",
"ta",
"ne",
"sa",
"en",
"dataset:SYSPIN",
"dataset:RASA",
"dataset:IndicTTS",
"dataset:SPICOR",
"base_model:canopylabs/3b-hi-ft-research_release",
"base_model:adapter:canopylabs/3b-hi-ft-research_release",
"text-generation-inference",
"endpoints_compatible",
"region:us"
],
"hardware_req": "8GB VRAM",
"hosting_type": "self-hosted",
"ai_metadata": {
"vram_inference_gb": 1,
"context_window_tokens": 4096,
"parameters_total_b": 0,
"parameters_active_b": 0,
"is_multimodal": false
},
"referral_url": ""
}
]