[ { "slug": "qwen2.5-7b-instruct", "name": "Qwen2.5 7B Instruct", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct", "description": "Open source model Qwen/Qwen2.5-7B-Instruct. 1073 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 1073, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen2", "chat", "conversational", "en", "arxiv:2309.00071", "arxiv:2407.10671", "base_model:Qwen/Qwen2.5-7B", "base_model:finetune:Qwen/Qwen2.5-7B", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 5, "context_window_tokens": 4096, "parameters_total_b": 7, "parameters_active_b": 7, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-0.6b", "name": "Qwen3 0.6B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen3-0.6B", "description": "Open source model Qwen/Qwen3-0.6B. 1083 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 1083, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen3", "conversational", "arxiv:2505.09388", "base_model:Qwen/Qwen3-0.6B-Base", "base_model:finetune:Qwen/Qwen3-0.6B-Base", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 4, "context_window_tokens": 4096, "parameters_total_b": 6, "parameters_active_b": 6, "is_multimodal": false }, "referral_url": "" }, { "slug": "gpt2", "name": "Gpt2", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/openai-community/gpt2", "description": "Open source model openai-community/gpt2. 3114 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 3114, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "transformers", "pytorch", "tf", "jax", "tflite", "rust", "onnx", "safetensors", "gpt2", "exbert", "en", "doi:10.57967/hf/0039", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen2.5-1.5b-instruct", "name": "Qwen2.5 1.5B Instruct", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct", "description": "Open source model Qwen/Qwen2.5-1.5B-Instruct. 617 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 617, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen2", "chat", "conversational", "en", "arxiv:2407.10671", "base_model:Qwen/Qwen2.5-1.5B", "base_model:finetune:Qwen/Qwen2.5-1.5B", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 4, "context_window_tokens": 4096, "parameters_total_b": 5, "parameters_active_b": 5, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen2.5-3b-instruct", "name": "Qwen2.5 3B Instruct", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen2.5-3B-Instruct", "description": "Open source model Qwen/Qwen2.5-3B-Instruct. 404 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 404, "language": "Python", "license": "other", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen2", "chat", "conversational", "en", "arxiv:2407.10671", "base_model:Qwen/Qwen2.5-3B", "base_model:finetune:Qwen/Qwen2.5-3B", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 2, "context_window_tokens": 4096, "parameters_total_b": 3, "parameters_active_b": 3, "is_multimodal": false }, "referral_url": "" }, { "slug": "llama-3.1-8b-instruct", "name": "Llama 3.1 8B Instruct", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct", "description": "Open source model meta-llama/Llama-3.1-8B-Instruct. 5467 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 5467, "language": "Python", "license": "llama3.1", "tags": [ "AI", "LLM", "transformers", "safetensors", "llama", "facebook", "meta", "pytorch", "llama-3", "conversational", "en", "de", "fr", "it", "pt", "hi", "es", "th", "arxiv:2204.05149", "base_model:meta-llama/Llama-3.1-8B", "base_model:finetune:meta-llama/Llama-3.1-8B", "eval-results", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 6, "context_window_tokens": 4096, "parameters_total_b": 8, "parameters_active_b": 8, "is_multimodal": false }, "referral_url": "" }, { "slug": "gpt-oss-20b", "name": "Gpt Oss 20B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/openai/gpt-oss-20b", "description": "Open source model openai/gpt-oss-20b. 4378 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 4378, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "gpt_oss", "vllm", "conversational", "arxiv:2508.10925", "endpoints_compatible", "8-bit", "mxfp4", "deploy:azure", "region:us" ], "hardware_req": "16GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 14, "context_window_tokens": 4096, "parameters_total_b": 20, "parameters_active_b": 20, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen2.5-0.5b-instruct", "name": "Qwen2.5 0.5B Instruct", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct", "description": "Open source model Qwen/Qwen2.5-0.5B-Instruct. 463 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 463, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen2", "chat", "conversational", "en", "arxiv:2407.10671", "base_model:Qwen/Qwen2.5-0.5B", "base_model:finetune:Qwen/Qwen2.5-0.5B", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 4, "context_window_tokens": 4096, "parameters_total_b": 5, "parameters_active_b": 5, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-4b", "name": "Qwen3 4B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen3-4B", "description": "Open source model Qwen/Qwen3-4B. 552 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 552, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen3", "conversational", "arxiv:2309.00071", "arxiv:2505.09388", "base_model:Qwen/Qwen3-4B-Base", "base_model:finetune:Qwen/Qwen3-4B-Base", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 3, "context_window_tokens": 4096, "parameters_total_b": 4, "parameters_active_b": 4, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-8b", "name": "Qwen3 8B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen3-8B", "description": "Open source model Qwen/Qwen3-8B. 940 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 940, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen3", "conversational", "arxiv:2309.00071", "arxiv:2505.09388", "base_model:Qwen/Qwen3-8B-Base", "base_model:finetune:Qwen/Qwen3-8B-Base", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 6, "context_window_tokens": 4096, "parameters_total_b": 8, "parameters_active_b": 8, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen2.5-32b-instruct", "name": "Qwen2.5 32B Instruct", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen2.5-32B-Instruct", "description": "Open source model Qwen/Qwen2.5-32B-Instruct. 328 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 328, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen2", "chat", "conversational", "en", "arxiv:2309.00071", "arxiv:2407.10671", "base_model:Qwen/Qwen2.5-32B", "base_model:finetune:Qwen/Qwen2.5-32B", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "24GB+ VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 22, "context_window_tokens": 4096, "parameters_total_b": 32, "parameters_active_b": 32, "is_multimodal": false }, "referral_url": "" }, { "slug": "opt-125m", "name": "Opt 125M", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/facebook/opt-125m", "description": "Open source model facebook/opt-125m. 233 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 233, "language": "Python", "license": "other", "tags": [ "AI", "LLM", "transformers", "pytorch", "tf", "jax", "opt", "en", "arxiv:2205.01068", "arxiv:2005.14165", "text-generation-inference", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-1.7b", "name": "Qwen3 1.7B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen3-1.7B", "description": "Open source model Qwen/Qwen3-1.7B. 422 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 422, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen3", "conversational", "arxiv:2505.09388", "base_model:Qwen/Qwen3-1.7B-Base", "base_model:finetune:Qwen/Qwen3-1.7B-Base", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 5, "context_window_tokens": 4096, "parameters_total_b": 7, "parameters_active_b": 7, "is_multimodal": false }, "referral_url": "" }, { "slug": "tiny-qwen2forcausallm-2.5", "name": "Tiny Qwen2Forcausallm 2.5", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", "description": "Open source model trl-internal-testing/tiny-Qwen2ForCausalLM-2.5. 3 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 3, "language": "Python", "license": "unknown", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen2", "trl", "conversational", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "dolphin-2.9.1-yi-1.5-34b", "name": "Dolphin 2.9.1 Yi 1.5 34B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/dphn/dolphin-2.9.1-yi-1.5-34b", "description": "Open source model dphn/dolphin-2.9.1-yi-1.5-34b. 54 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 54, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "llama", "generated_from_trainer", "axolotl", "conversational", "dataset:cognitivecomputations/Dolphin-2.9", "dataset:teknium/OpenHermes-2.5", "dataset:m-a-p/CodeFeedback-Filtered-Instruction", "dataset:cognitivecomputations/dolphin-coder", "dataset:cognitivecomputations/samantha-data", "dataset:microsoft/orca-math-word-problems-200k", "dataset:Locutusque/function-calling-chatml", "dataset:internlm/Agent-FLAN", "base_model:01-ai/Yi-1.5-34B", "base_model:finetune:01-ai/Yi-1.5-34B", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "24GB+ VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 24, "context_window_tokens": 4096, "parameters_total_b": 34, "parameters_active_b": 34, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-embedding-0.6b", "name": "Qwen3 Embedding 0.6B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen3-Embedding-0.6B", "description": "Open source model Qwen/Qwen3-Embedding-0.6B. 879 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 879, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "sentence-transformers", "safetensors", "qwen3", "transformers", "sentence-similarity", "feature-extraction", "text-embeddings-inference", "arxiv:2506.05176", "base_model:Qwen/Qwen3-0.6B-Base", "base_model:finetune:Qwen/Qwen3-0.6B-Base", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 4, "context_window_tokens": 4096, "parameters_total_b": 6, "parameters_active_b": 6, "is_multimodal": false }, "referral_url": "" }, { "slug": "gpt-oss-120b", "name": "Gpt Oss 120B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/openai/gpt-oss-120b", "description": "Open source model openai/gpt-oss-120b. 4503 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 4503, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "gpt_oss", "vllm", "conversational", "arxiv:2508.10925", "endpoints_compatible", "8-bit", "mxfp4", "deploy:azure", "region:us" ], "hardware_req": "24GB+ VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 84, "context_window_tokens": 4096, "parameters_total_b": 120, "parameters_active_b": 120, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-4b-instruct-2507", "name": "Qwen3 4B Instruct 2507", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507", "description": "Open source model Qwen/Qwen3-4B-Instruct-2507. 730 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 730, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen3", "conversational", "arxiv:2505.09388", "eval-results", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 3, "context_window_tokens": 4096, "parameters_total_b": 4, "parameters_active_b": 4, "is_multimodal": false }, "referral_url": "" }, { "slug": "moondream2", "name": "Moondream2", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/vikhyatk/moondream2", "description": "Open source model vikhyatk/moondream2. 1373 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 1373, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "moondream1", "image-text-to-text", "custom_code", "doi:10.57967/hf/6762", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "llama-3.2-1b-instruct", "name": "Llama 3.2 1B Instruct", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct", "description": "Open source model meta-llama/Llama-3.2-1B-Instruct. 1292 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 1292, "language": "Python", "license": "llama3.2", "tags": [ "AI", "LLM", "transformers", "safetensors", "llama", "facebook", "meta", "pytorch", "llama-3", "conversational", "en", "de", "fr", "it", "pt", "hi", "es", "th", "arxiv:2204.05149", "arxiv:2405.16406", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 1, "parameters_active_b": 1, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen2-1.5b-instruct", "name": "Qwen2 1.5B Instruct", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen2-1.5B-Instruct", "description": "Open source model Qwen/Qwen2-1.5B-Instruct. 158 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 158, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen2", "chat", "conversational", "en", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 4, "context_window_tokens": 4096, "parameters_total_b": 5, "parameters_active_b": 5, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen2.5-coder-0.5b-instruct", "name": "Qwen2.5 Coder 0.5B Instruct", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen2.5-Coder-0.5B-Instruct", "description": "Open source model Qwen/Qwen2.5-Coder-0.5B-Instruct. 64 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 64, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen2", "code", "codeqwen", "chat", "qwen", "qwen-coder", "conversational", "en", "arxiv:2409.12186", "arxiv:2407.10671", "base_model:Qwen/Qwen2.5-Coder-0.5B", "base_model:finetune:Qwen/Qwen2.5-Coder-0.5B", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 4, "context_window_tokens": 4096, "parameters_total_b": 5, "parameters_active_b": 5, "is_multimodal": false }, "referral_url": "" }, { "slug": "kimi-k2.5", "name": "Kimi K2.5", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/mlx-community/Kimi-K2.5", "description": "Open source model mlx-community/Kimi-K2.5. 28 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 28, "language": "Python", "license": "other", "tags": [ "AI", "LLM", "mlx", "safetensors", "kimi_k25", "conversational", "custom_code", "base_model:moonshotai/Kimi-K2.5", "base_model:quantized:moonshotai/Kimi-K2.5", "4-bit", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "mistral-7b-instruct-v0.2", "name": "Mistral 7B Instruct V0.2", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2", "description": "Open source model mistralai/Mistral-7B-Instruct-v0.2. 3075 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 3075, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "pytorch", "safetensors", "mistral", "finetuned", "mistral-common", "conversational", "arxiv:2310.06825", "text-generation-inference", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 5, "context_window_tokens": 4096, "parameters_total_b": 7, "parameters_active_b": 7, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-30b-a3b-instruct-2507", "name": "Qwen3 30B A3B Instruct 2507", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen3-30B-A3B-Instruct-2507", "description": "Open source model Qwen/Qwen3-30B-A3B-Instruct-2507. 766 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 766, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen3_moe", "conversational", "arxiv:2402.17463", "arxiv:2407.02490", "arxiv:2501.15383", "arxiv:2404.06654", "arxiv:2505.09388", "eval-results", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "24GB+ VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 21, "context_window_tokens": 4096, "parameters_total_b": 30, "parameters_active_b": 30, "is_multimodal": false }, "referral_url": "" }, { "slug": "llm-jp-3-3.7b-instruct", "name": "Llm Jp 3 3.7B Instruct", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/llm-jp/llm-jp-3-3.7b-instruct", "description": "Open source model llm-jp/llm-jp-3-3.7b-instruct. 13 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 13, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "llama", "conversational", "en", "ja", "text-generation-inference", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 5, "context_window_tokens": 4096, "parameters_total_b": 7, "parameters_active_b": 7, "is_multimodal": false }, "referral_url": "" }, { "slug": "llama-3.2-3b-instruct", "name": "Llama 3.2 3B Instruct", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct", "description": "Open source model meta-llama/Llama-3.2-3B-Instruct. 1986 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 1986, "language": "Python", "license": "llama3.2", "tags": [ "AI", "LLM", "transformers", "safetensors", "llama", "facebook", "meta", "pytorch", "llama-3", "conversational", "en", "de", "fr", "it", "pt", "hi", "es", "th", "arxiv:2204.05149", "arxiv:2405.16406", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 2, "context_window_tokens": 4096, "parameters_total_b": 3, "parameters_active_b": 3, "is_multimodal": false }, "referral_url": "" }, { "slug": "distilgpt2", "name": "Distilgpt2", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/distilbert/distilgpt2", "description": "Open source model distilbert/distilgpt2. 609 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 609, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "pytorch", "tf", "jax", "tflite", "rust", "coreml", "safetensors", "gpt2", "exbert", "en", "dataset:openwebtext", "arxiv:1910.01108", "arxiv:2201.08542", "arxiv:2203.12574", "arxiv:1910.09700", "arxiv:1503.02531", "model-index", "co2_eq_emissions", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-embedding-8b", "name": "Qwen3 Embedding 8B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen3-Embedding-8B", "description": "Open source model Qwen/Qwen3-Embedding-8B. 584 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 584, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "sentence-transformers", "safetensors", "qwen3", "transformers", "sentence-similarity", "feature-extraction", "text-embeddings-inference", "arxiv:2506.05176", "base_model:Qwen/Qwen3-8B-Base", "base_model:finetune:Qwen/Qwen3-8B-Base", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 6, "context_window_tokens": 4096, "parameters_total_b": 8, "parameters_active_b": 8, "is_multimodal": false }, "referral_url": "" }, { "slug": "meta-llama-3-8b", "name": "Meta Llama 3 8B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/meta-llama/Meta-Llama-3-8B", "description": "Open source model meta-llama/Meta-Llama-3-8B. 6458 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 6458, "language": "Python", "license": "llama3", "tags": [ "AI", "LLM", "transformers", "safetensors", "llama", "facebook", "meta", "pytorch", "llama-3", "en", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 6, "context_window_tokens": 4096, "parameters_total_b": 8, "parameters_active_b": 8, "is_multimodal": false }, "referral_url": "" }, { "slug": "tinyllama-1.1b-chat-v1.0", "name": "Tinyllama 1.1B Chat V1.0", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0", "description": "Open source model TinyLlama/TinyLlama-1.1B-Chat-v1.0. 1526 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 1526, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "llama", "conversational", "en", "dataset:cerebras/SlimPajama-627B", "dataset:bigcode/starcoderdata", "dataset:HuggingFaceH4/ultrachat_200k", "dataset:HuggingFaceH4/ultrafeedback_binarized", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 1, "parameters_active_b": 1, "is_multimodal": false }, "referral_url": "" }, { "slug": "glm-4.7-flash", "name": "Glm 4.7 Flash", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/zai-org/GLM-4.7-Flash", "description": "Open source model zai-org/GLM-4.7-Flash. 1538 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 1538, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "transformers", "safetensors", "glm4_moe_lite", "conversational", "en", "zh", "arxiv:2508.06471", "eval-results", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "llama-3.2-1b", "name": "Llama 3.2 1B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/meta-llama/Llama-3.2-1B", "description": "Open source model meta-llama/Llama-3.2-1B. 2295 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 2295, "language": "Python", "license": "llama3.2", "tags": [ "AI", "LLM", "transformers", "safetensors", "llama", "facebook", "meta", "pytorch", "llama-3", "en", "de", "fr", "it", "pt", "hi", "es", "th", "arxiv:2204.05149", "arxiv:2405.16406", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 1, "parameters_active_b": 1, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-32b", "name": "Qwen3 32B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen3-32B", "description": "Open source model Qwen/Qwen3-32B. 656 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 656, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen3", "conversational", "arxiv:2309.00071", "arxiv:2505.09388", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "24GB+ VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 22, "context_window_tokens": 4096, "parameters_total_b": 32, "parameters_active_b": 32, "is_multimodal": false }, "referral_url": "" }, { "slug": "llama-3.2-1b-instruct-fp8-dynamic", "name": "Llama 3.2 1B Instruct Fp8 Dynamic", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/RedHatAI/Llama-3.2-1B-Instruct-FP8-dynamic", "description": "Open source model RedHatAI/Llama-3.2-1B-Instruct-FP8-dynamic. 3 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 3, "language": "Python", "license": "llama3.2", "tags": [ "AI", "LLM", "safetensors", "llama", "fp8", "vllm", "conversational", "en", "de", "fr", "it", "pt", "hi", "es", "th", "base_model:meta-llama/Llama-3.2-1B-Instruct", "base_model:quantized:meta-llama/Llama-3.2-1B-Instruct", "compressed-tensors", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 1, "parameters_active_b": 1, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen2.5-coder-1.5b-instruct", "name": "Qwen2.5 Coder 1.5B Instruct", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct", "description": "Open source model Qwen/Qwen2.5-Coder-1.5B-Instruct. 106 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 106, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen2", "code", "codeqwen", "chat", "qwen", "qwen-coder", "conversational", "en", "arxiv:2409.12186", "arxiv:2407.10671", "base_model:Qwen/Qwen2.5-Coder-1.5B", "base_model:finetune:Qwen/Qwen2.5-Coder-1.5B", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 4, "context_window_tokens": 4096, "parameters_total_b": 5, "parameters_active_b": 5, "is_multimodal": false }, "referral_url": "" }, { "slug": "meta-llama-3-8b-instruct", "name": "Meta Llama 3 8B Instruct", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct", "description": "Open source model meta-llama/Meta-Llama-3-8B-Instruct. 4380 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 4380, "language": "Python", "license": "llama3", "tags": [ "AI", "LLM", "transformers", "safetensors", "llama", "facebook", "meta", "pytorch", "llama-3", "conversational", "en", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 6, "context_window_tokens": 4096, "parameters_total_b": 8, "parameters_active_b": 8, "is_multimodal": false }, "referral_url": "" }, { "slug": "gemma-3-1b-it", "name": "Gemma 3 1B It", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/google/gemma-3-1b-it", "description": "Open source model google/gemma-3-1b-it. 842 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 842, "language": "Python", "license": "gemma", "tags": [ "AI", "LLM", "transformers", "safetensors", "gemma3_text", "conversational", "arxiv:1905.07830", "arxiv:1905.10044", "arxiv:1911.11641", "arxiv:1904.09728", "arxiv:1705.03551", "arxiv:1911.01547", "arxiv:1907.10641", "arxiv:1903.00161", "arxiv:2009.03300", "arxiv:2304.06364", "arxiv:2103.03874", "arxiv:2110.14168", "arxiv:2311.12022", "arxiv:2108.07732", "arxiv:2107.03374", "arxiv:2210.03057", "arxiv:2106.03193", "arxiv:1910.11856", "arxiv:2502.12404", "arxiv:2502.21228", "arxiv:2404.16816", "arxiv:2104.12756", "arxiv:2311.16502", "arxiv:2203.10244", "arxiv:2404.12390", "arxiv:1810.12440", "arxiv:1908.02660", "arxiv:2312.11805", "base_model:google/gemma-3-1b-pt", "base_model:finetune:google/gemma-3-1b-pt", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 1, "parameters_active_b": 1, "is_multimodal": false }, "referral_url": "" }, { "slug": "phi-2", "name": "Phi 2", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/microsoft/phi-2", "description": "Open source model microsoft/phi-2. 3425 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 3425, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "transformers", "safetensors", "phi", "nlp", "code", "en", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen2.5-coder-7b-instruct", "name": "Qwen2.5 Coder 7B Instruct", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct", "description": "Open source model Qwen/Qwen2.5-Coder-7B-Instruct. 646 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 646, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen2", "code", "codeqwen", "chat", "qwen", "qwen-coder", "conversational", "en", "arxiv:2409.12186", "arxiv:2309.00071", "arxiv:2407.10671", "base_model:Qwen/Qwen2.5-Coder-7B", "base_model:finetune:Qwen/Qwen2.5-Coder-7B", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 5, "context_window_tokens": 4096, "parameters_total_b": 7, "parameters_active_b": 7, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen2.5-7b", "name": "Qwen2.5 7B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen2.5-7B", "description": "Open source model Qwen/Qwen2.5-7B. 264 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 264, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen2", "conversational", "en", "arxiv:2407.10671", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 5, "context_window_tokens": 4096, "parameters_total_b": 7, "parameters_active_b": 7, "is_multimodal": false }, "referral_url": "" }, { "slug": "deepseek-r1-distill-qwen-1.5b", "name": "Deepseek R1 Distill Qwen 1.5B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "description": "Open source model deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B. 1446 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 1446, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen2", "conversational", "arxiv:2501.12948", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 4, "context_window_tokens": 4096, "parameters_total_b": 5, "parameters_active_b": 5, "is_multimodal": false }, "referral_url": "" }, { "slug": "deepseek-v3", "name": "Deepseek V3", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/deepseek-ai/DeepSeek-V3", "description": "Open source model deepseek-ai/DeepSeek-V3. 4024 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 4024, "language": "Python", "license": "unknown", "tags": [ "AI", "LLM", "transformers", "safetensors", "deepseek_v3", "conversational", "custom_code", "arxiv:2412.19437", "eval-results", "text-generation-inference", "endpoints_compatible", "fp8", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "gpt2-large", "name": "Gpt2 Large", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/openai-community/gpt2-large", "description": "Open source model openai-community/gpt2-large. 344 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 344, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "transformers", "pytorch", "tf", "jax", "rust", "onnx", "safetensors", "gpt2", "en", "arxiv:1910.09700", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "glm-4.7-flash-mlx-8bit", "name": "Glm 4.7 Flash Mlx 8Bit", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/lmstudio-community/GLM-4.7-Flash-MLX-8bit", "description": "Open source model lmstudio-community/GLM-4.7-Flash-MLX-8bit. 9 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 9, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "transformers", "safetensors", "glm4_moe_lite", "mlx", "conversational", "en", "zh", "base_model:zai-org/GLM-4.7-Flash", "base_model:quantized:zai-org/GLM-4.7-Flash", "endpoints_compatible", "8-bit", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 6, "context_window_tokens": 4096, "parameters_total_b": 8, "parameters_active_b": 8, "is_multimodal": false }, "referral_url": "" }, { "slug": "glm-4.7-flash-mlx-6bit", "name": "Glm 4.7 Flash Mlx 6Bit", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/lmstudio-community/GLM-4.7-Flash-MLX-6bit", "description": "Open source model lmstudio-community/GLM-4.7-Flash-MLX-6bit. 7 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 7, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "transformers", "safetensors", "glm4_moe_lite", "mlx", "conversational", "en", "zh", "base_model:zai-org/GLM-4.7-Flash", "base_model:quantized:zai-org/GLM-4.7-Flash", "endpoints_compatible", "6-bit", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 4, "context_window_tokens": 4096, "parameters_total_b": 6, "parameters_active_b": 6, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-0.6b-fp8", "name": "Qwen3 0.6B Fp8", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen3-0.6B-FP8", "description": "Open source model Qwen/Qwen3-0.6B-FP8. 56 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 56, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen3", "conversational", "arxiv:2505.09388", "base_model:Qwen/Qwen3-0.6B", "base_model:quantized:Qwen/Qwen3-0.6B", "text-generation-inference", "endpoints_compatible", "fp8", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 4, "context_window_tokens": 4096, "parameters_total_b": 6, "parameters_active_b": 6, "is_multimodal": false }, "referral_url": "" }, { "slug": "llama-3.1-8b", "name": "Llama 3.1 8B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/meta-llama/Llama-3.1-8B", "description": "Open source model meta-llama/Llama-3.1-8B. 2065 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 2065, "language": "Python", "license": "llama3.1", "tags": [ "AI", "LLM", "transformers", "safetensors", "llama", "facebook", "meta", "pytorch", "llama-3", "en", "de", "fr", "it", "pt", "hi", "es", "th", "arxiv:2204.05149", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 6, "context_window_tokens": 4096, "parameters_total_b": 8, "parameters_active_b": 8, "is_multimodal": false }, "referral_url": "" }, { "slug": "pythia-160m", "name": "Pythia 160M", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/EleutherAI/pythia-160m", "description": "Open source model EleutherAI/pythia-160m. 38 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 38, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "pytorch", "safetensors", "gpt_neox", "causal-lm", "pythia", "en", "dataset:EleutherAI/pile", "arxiv:2304.01373", "arxiv:2101.00027", "arxiv:2201.07311", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "deepseek-r1-distill-qwen-32b", "name": "Deepseek R1 Distill Qwen 32B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", "description": "Open source model deepseek-ai/DeepSeek-R1-Distill-Qwen-32B. 1517 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 1517, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen2", "conversational", "arxiv:2501.12948", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "24GB+ VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 22, "context_window_tokens": 4096, "parameters_total_b": 32, "parameters_active_b": 32, "is_multimodal": false }, "referral_url": "" }, { "slug": "hunyuanocr", "name": "Hunyuanocr", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/tencent/HunyuanOCR", "description": "Open source model tencent/HunyuanOCR. 553 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 553, "language": "Python", "license": "other", "tags": [ "AI", "LLM", "transformers", "safetensors", "hunyuan_vl", "ocr", "hunyuan", "vision-language", "image-to-text", "1B", "end-to-end", "image-text-to-text", "conversational", "multilingual", "arxiv:2511.19575", "base_model:tencent/HunyuanOCR", "base_model:finetune:tencent/HunyuanOCR", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-30b-a3b", "name": "Qwen3 30B A3B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen3-30B-A3B", "description": "Open source model Qwen/Qwen3-30B-A3B. 855 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 855, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen3_moe", "conversational", "arxiv:2309.00071", "arxiv:2505.09388", "base_model:Qwen/Qwen3-30B-A3B-Base", "base_model:finetune:Qwen/Qwen3-30B-A3B-Base", "endpoints_compatible", "region:us" ], "hardware_req": "24GB+ VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 21, "context_window_tokens": 4096, "parameters_total_b": 30, "parameters_active_b": 30, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen2.5-0.5b", "name": "Qwen2.5 0.5B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen2.5-0.5B", "description": "Open source model Qwen/Qwen2.5-0.5B. 372 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 372, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen2", "conversational", "en", "arxiv:2407.10671", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 4, "context_window_tokens": 4096, "parameters_total_b": 5, "parameters_active_b": 5, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen2.5-32b-instruct-awq", "name": "Qwen2.5 32B Instruct Awq", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen2.5-32B-Instruct-AWQ", "description": "Open source model Qwen/Qwen2.5-32B-Instruct-AWQ. 94 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 94, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen2", "chat", "conversational", "en", "arxiv:2309.00071", "arxiv:2407.10671", "base_model:Qwen/Qwen2.5-32B-Instruct", "base_model:quantized:Qwen/Qwen2.5-32B-Instruct", "text-generation-inference", "endpoints_compatible", "4-bit", "awq", "region:us" ], "hardware_req": "24GB+ VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 22, "context_window_tokens": 4096, "parameters_total_b": 32, "parameters_active_b": 32, "is_multimodal": false }, "referral_url": "" }, { "slug": "nvidia-nemotron-3-nano-30b-a3b-fp8", "name": "Nvidia Nemotron 3 Nano 30B A3B Fp8", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8", "description": "Open source model nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8. 284 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 284, "language": "Python", "license": "other", "tags": [ "AI", "LLM", "transformers", "safetensors", "nemotron_h", "feature-extraction", "nvidia", "pytorch", "conversational", "custom_code", "en", "es", "fr", "de", "ja", "it", "dataset:nvidia/Nemotron-Pretraining-Code-v1", "dataset:nvidia/Nemotron-CC-v2", "dataset:nvidia/Nemotron-Pretraining-SFT-v1", "dataset:nvidia/Nemotron-CC-Math-v1", "dataset:nvidia/Nemotron-Pretraining-Code-v2", "dataset:nvidia/Nemotron-Pretraining-Specialized-v1", "dataset:nvidia/Nemotron-CC-v2.1", "dataset:nvidia/Nemotron-CC-Code-v1", "dataset:nvidia/Nemotron-Pretraining-Dataset-sample", "dataset:nvidia/Nemotron-Competitive-Programming-v1", "dataset:nvidia/Nemotron-Math-v2", "dataset:nvidia/Nemotron-Agentic-v1", "dataset:nvidia/Nemotron-Math-Proofs-v1", "dataset:nvidia/Nemotron-Instruction-Following-Chat-v1", "dataset:nvidia/Nemotron-Science-v1", "dataset:nvidia/Nemotron-3-Nano-RL-Training-Blend", "arxiv:2512.20848", "arxiv:2512.20856", "base_model:nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", "base_model:quantized:nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", "eval-results", "region:us" ], "hardware_req": "24GB+ VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 21, "context_window_tokens": 4096, "parameters_total_b": 30, "parameters_active_b": 30, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen2.5-14b-instruct", "name": "Qwen2.5 14B Instruct", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen2.5-14B-Instruct", "description": "Open source model Qwen/Qwen2.5-14B-Instruct. 312 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 312, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen2", "chat", "conversational", "en", "arxiv:2309.00071", "arxiv:2407.10671", "base_model:Qwen/Qwen2.5-14B", "base_model:finetune:Qwen/Qwen2.5-14B", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "16GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 10, "context_window_tokens": 4096, "parameters_total_b": 14, "parameters_active_b": 14, "is_multimodal": false }, "referral_url": "" }, { "slug": "nvidia-nemotron-3-nano-30b-a3b-bf16", "name": "Nvidia Nemotron 3 Nano 30B A3B Bf16", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", "description": "Open source model nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16. 634 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 634, "language": "Python", "license": "other", "tags": [ "AI", "LLM", "transformers", "safetensors", "nemotron_h", "feature-extraction", "nvidia", "pytorch", "conversational", "custom_code", "en", "es", "fr", "de", "ja", "it", "dataset:nvidia/Nemotron-Pretraining-Code-v1", "dataset:nvidia/Nemotron-CC-v2", "dataset:nvidia/Nemotron-Pretraining-SFT-v1", "dataset:nvidia/Nemotron-CC-Math-v1", "dataset:nvidia/Nemotron-Pretraining-Code-v2", "dataset:nvidia/Nemotron-Pretraining-Specialized-v1", "dataset:nvidia/Nemotron-CC-v2.1", "dataset:nvidia/Nemotron-CC-Code-v1", "dataset:nvidia/Nemotron-Pretraining-Dataset-sample", "dataset:nvidia/Nemotron-Competitive-Programming-v1", "dataset:nvidia/Nemotron-Math-v2", "dataset:nvidia/Nemotron-Agentic-v1", "dataset:nvidia/Nemotron-Math-Proofs-v1", "dataset:nvidia/Nemotron-Instruction-Following-Chat-v1", "dataset:nvidia/Nemotron-Science-v1", "dataset:nvidia/Nemotron-3-Nano-RL-Training-Blend", "arxiv:2512.20848", "arxiv:2512.20856", "eval-results", "region:us" ], "hardware_req": "24GB+ VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 21, "context_window_tokens": 4096, "parameters_total_b": 30, "parameters_active_b": 30, "is_multimodal": false }, "referral_url": "" }, { "slug": "openelm-1_1b-instruct", "name": "Openelm 1_1B Instruct", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/apple/OpenELM-1_1B-Instruct", "description": "Open source model apple/OpenELM-1_1B-Instruct. 72 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 72, "language": "Python", "license": "apple-amlr", "tags": [ "AI", "LLM", "transformers", "safetensors", "openelm", "custom_code", "arxiv:2404.14619", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 1, "parameters_active_b": 1, "is_multimodal": false }, "referral_url": "" }, { "slug": "tiny-random-llamaforcausallm", "name": "Tiny Random Llamaforcausallm", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/hmellor/tiny-random-LlamaForCausalLM", "description": "Open source model hmellor/tiny-random-LlamaForCausalLM. 0 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 0, "language": "Python", "license": "unknown", "tags": [ "AI", "LLM", "transformers", "safetensors", "llama", "conversational", "arxiv:1910.09700", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-next-80b-a3b-instruct", "name": "Qwen3 Next 80B A3B Instruct", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Instruct", "description": "Open source model Qwen/Qwen3-Next-80B-A3B-Instruct. 937 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 937, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen3_next", "conversational", "arxiv:2309.00071", "arxiv:2404.06654", "arxiv:2505.09388", "arxiv:2501.15383", "eval-results", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "24GB+ VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 56, "context_window_tokens": 4096, "parameters_total_b": 80, "parameters_active_b": 80, "is_multimodal": false }, "referral_url": "" }, { "slug": "h2ovl-mississippi-800m", "name": "H2Ovl Mississippi 800M", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/h2oai/h2ovl-mississippi-800m", "description": "Open source model h2oai/h2ovl-mississippi-800m. 39 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 39, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "h2ovl_chat", "feature-extraction", "gpt", "llm", "multimodal large language model", "ocr", "conversational", "custom_code", "en", "arxiv:2410.13611", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "bloomz-560m", "name": "Bloomz 560M", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/bigscience/bloomz-560m", "description": "Open source model bigscience/bloomz-560m. 137 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 137, "language": "Python", "license": "bigscience-bloom-rail-1.0", "tags": [ "AI", "LLM", "transformers", "pytorch", "tensorboard", "safetensors", "bloom", "ak", "ar", "as", "bm", "bn", "ca", "code", "en", "es", "eu", "fon", "fr", "gu", "hi", "id", "ig", "ki", "kn", "lg", "ln", "ml", "mr", "ne", "nso", "ny", "or", "pa", "pt", "rn", "rw", "sn", "st", "sw", "ta", "te", "tn", "ts", "tum", "tw", "ur", "vi", "wo", "xh", "yo", "zh", "zu", "dataset:bigscience/xP3", "arxiv:2211.01786", "model-index", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen2.5-1.5b-quantized.w8a8", "name": "Qwen2.5 1.5B Quantized.W8A8", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/RedHatAI/Qwen2.5-1.5B-quantized.w8a8", "description": "Open source model RedHatAI/Qwen2.5-1.5B-quantized.w8a8. 2 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 2, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "safetensors", "qwen2", "chat", "neuralmagic", "llmcompressor", "conversational", "en", "base_model:Qwen/Qwen2.5-1.5B", "base_model:quantized:Qwen/Qwen2.5-1.5B", "8-bit", "compressed-tensors", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 4, "context_window_tokens": 4096, "parameters_total_b": 5, "parameters_active_b": 5, "is_multimodal": false }, "referral_url": "" }, { "slug": "h2ovl-mississippi-2b", "name": "H2Ovl Mississippi 2B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/h2oai/h2ovl-mississippi-2b", "description": "Open source model h2oai/h2ovl-mississippi-2b. 40 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 40, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "h2ovl_chat", "feature-extraction", "gpt", "llm", "multimodal large language model", "ocr", "conversational", "custom_code", "en", "arxiv:2410.13611", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 2, "parameters_active_b": 2, "is_multimodal": false }, "referral_url": "" }, { "slug": "llava-v1.5-7b", "name": "Llava V1.5 7B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/liuhaotian/llava-v1.5-7b", "description": "Open source model liuhaotian/llava-v1.5-7b. 537 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 537, "language": "Python", "license": "unknown", "tags": [ "AI", "LLM", "transformers", "pytorch", "llava", "image-text-to-text", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 5, "context_window_tokens": 4096, "parameters_total_b": 7, "parameters_active_b": 7, "is_multimodal": false }, "referral_url": "" }, { "slug": "t5-3b", "name": "T5 3B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/google-t5/t5-3b", "description": "Open source model google-t5/t5-3b. 51 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 51, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "pytorch", "tf", "safetensors", "t5", "summarization", "translation", "en", "fr", "ro", "de", "multilingual", "dataset:c4", "arxiv:1805.12471", "arxiv:1708.00055", "arxiv:1704.05426", "arxiv:1606.05250", "arxiv:1808.09121", "arxiv:1810.12885", "arxiv:1905.10044", "arxiv:1910.09700", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 2, "context_window_tokens": 4096, "parameters_total_b": 3, "parameters_active_b": 3, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen2.5-14b-instruct-awq", "name": "Qwen2.5 14B Instruct Awq", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-AWQ", "description": "Open source model Qwen/Qwen2.5-14B-Instruct-AWQ. 27 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 27, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen2", "chat", "conversational", "en", "arxiv:2309.00071", "arxiv:2407.10671", "base_model:Qwen/Qwen2.5-14B-Instruct", "base_model:quantized:Qwen/Qwen2.5-14B-Instruct", "text-generation-inference", "endpoints_compatible", "4-bit", "awq", "region:us" ], "hardware_req": "16GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 10, "context_window_tokens": 4096, "parameters_total_b": 14, "parameters_active_b": 14, "is_multimodal": false }, "referral_url": "" }, { "slug": "llama-3.2-3b", "name": "Llama 3.2 3B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/meta-llama/Llama-3.2-3B", "description": "Open source model meta-llama/Llama-3.2-3B. 697 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 697, "language": "Python", "license": "llama3.2", "tags": [ "AI", "LLM", "transformers", "safetensors", "llama", "facebook", "meta", "pytorch", "llama-3", "en", "de", "fr", "it", "pt", "hi", "es", "th", "arxiv:2204.05149", "arxiv:2405.16406", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 2, "context_window_tokens": 4096, "parameters_total_b": 3, "parameters_active_b": 3, "is_multimodal": false }, "referral_url": "" }, { "slug": "phi-3-mini-4k-instruct-gptq-4bit", "name": "Phi 3 Mini 4K Instruct Gptq 4Bit", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/kaitchup/Phi-3-mini-4k-instruct-gptq-4bit", "description": "Open source model kaitchup/Phi-3-mini-4k-instruct-gptq-4bit. 2 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 2, "language": "Python", "license": "unknown", "tags": [ "AI", "LLM", "transformers", "safetensors", "phi3", "conversational", "custom_code", "arxiv:1910.09700", "text-generation-inference", "endpoints_compatible", "4-bit", "gptq", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 3, "context_window_tokens": 4096, "parameters_total_b": 4, "parameters_active_b": 4, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen2.5-72b-instruct-awq", "name": "Qwen2.5 72B Instruct Awq", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct-AWQ", "description": "Open source model Qwen/Qwen2.5-72B-Instruct-AWQ. 74 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 74, "language": "Python", "license": "other", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen2", "chat", "conversational", "en", "arxiv:2309.00071", "arxiv:2407.10671", "base_model:Qwen/Qwen2.5-72B-Instruct", "base_model:quantized:Qwen/Qwen2.5-72B-Instruct", "text-generation-inference", "endpoints_compatible", "4-bit", "awq", "region:us" ], "hardware_req": "24GB+ VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 50, "context_window_tokens": 4096, "parameters_total_b": 72, "parameters_active_b": 72, "is_multimodal": false }, "referral_url": "" }, { "slug": "smollm2-135m", "name": "Smollm2 135M", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/HuggingFaceTB/SmolLM2-135M", "description": "Open source model HuggingFaceTB/SmolLM2-135M. 166 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 166, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "llama", "en", "arxiv:2502.02737", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "llama-3.3-70b-instruct", "name": "Llama 3.3 70B Instruct", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct", "description": "Open source model meta-llama/Llama-3.3-70B-Instruct. 2658 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 2658, "language": "Python", "license": "llama3.3", "tags": [ "AI", "LLM", "transformers", "safetensors", "llama", "facebook", "meta", "pytorch", "llama-3", "conversational", "en", "fr", "it", "pt", "hi", "es", "th", "de", "arxiv:2204.05149", "base_model:meta-llama/Llama-3.1-70B", "base_model:finetune:meta-llama/Llama-3.1-70B", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "24GB+ VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 49, "context_window_tokens": 4096, "parameters_total_b": 70, "parameters_active_b": 70, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-30b-a3b-instruct-2507-fp8", "name": "Qwen3 30B A3B Instruct 2507 Fp8", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen3-30B-A3B-Instruct-2507-FP8", "description": "Open source model Qwen/Qwen3-30B-A3B-Instruct-2507-FP8. 112 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 112, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen3_moe", "conversational", "arxiv:2505.09388", "base_model:Qwen/Qwen3-30B-A3B-Instruct-2507", "base_model:quantized:Qwen/Qwen3-30B-A3B-Instruct-2507", "endpoints_compatible", "fp8", "deploy:azure", "region:us" ], "hardware_req": "24GB+ VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 21, "context_window_tokens": 4096, "parameters_total_b": 30, "parameters_active_b": 30, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen2.5-coder-32b-instruct", "name": "Qwen2.5 Coder 32B Instruct", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct", "description": "Open source model Qwen/Qwen2.5-Coder-32B-Instruct. 1995 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 1995, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen2", "code", "codeqwen", "chat", "qwen", "qwen-coder", "conversational", "en", "arxiv:2409.12186", "arxiv:2309.00071", "arxiv:2407.10671", "base_model:Qwen/Qwen2.5-Coder-32B", "base_model:finetune:Qwen/Qwen2.5-Coder-32B", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "24GB+ VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 22, "context_window_tokens": 4096, "parameters_total_b": 32, "parameters_active_b": 32, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-235b-a22b-instruct-2507-fp8", "name": "Qwen3 235B A22B Instruct 2507 Fp8", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen3-235B-A22B-Instruct-2507-FP8", "description": "Open source model Qwen/Qwen3-235B-A22B-Instruct-2507-FP8. 145 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 145, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen3_moe", "conversational", "arxiv:2505.09388", "base_model:Qwen/Qwen3-235B-A22B-Instruct-2507", "base_model:quantized:Qwen/Qwen3-235B-A22B-Instruct-2507", "endpoints_compatible", "fp8", "deploy:azure", "region:us" ], "hardware_req": "24GB+ VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 164, "context_window_tokens": 4096, "parameters_total_b": 235, "parameters_active_b": 235, "is_multimodal": false }, "referral_url": "" }, { "slug": "deepseek-r1-distill-qwen-7b", "name": "Deepseek R1 Distill Qwen 7B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", "description": "Open source model deepseek-ai/DeepSeek-R1-Distill-Qwen-7B. 787 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 787, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen2", "conversational", "arxiv:2501.12948", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 5, "context_window_tokens": 4096, "parameters_total_b": 7, "parameters_active_b": 7, "is_multimodal": false }, "referral_url": "" }, { "slug": "phi-3-mini-4k-instruct", "name": "Phi 3 Mini 4K Instruct", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct", "description": "Open source model microsoft/Phi-3-mini-4k-instruct. 1386 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 1386, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "transformers", "safetensors", "phi3", "nlp", "code", "conversational", "custom_code", "en", "fr", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-14b", "name": "Qwen3 14B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen3-14B", "description": "Open source model Qwen/Qwen3-14B. 366 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 366, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen3", "conversational", "arxiv:2309.00071", "arxiv:2505.09388", "base_model:Qwen/Qwen3-14B-Base", "base_model:finetune:Qwen/Qwen3-14B-Base", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "16GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 10, "context_window_tokens": 4096, "parameters_total_b": 14, "parameters_active_b": 14, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen2.5-coder-1.5b", "name": "Qwen2.5 Coder 1.5B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B", "description": "Open source model Qwen/Qwen2.5-Coder-1.5B. 81 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 81, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen2", "code", "qwen", "qwen-coder", "codeqwen", "conversational", "en", "arxiv:2409.12186", "arxiv:2407.10671", "base_model:Qwen/Qwen2.5-1.5B", "base_model:finetune:Qwen/Qwen2.5-1.5B", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 4, "context_window_tokens": 4096, "parameters_total_b": 5, "parameters_active_b": 5, "is_multimodal": false }, "referral_url": "" }, { "slug": "llama-3.1-70b-instruct", "name": "Llama 3.1 70B Instruct", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct", "description": "Open source model meta-llama/Llama-3.1-70B-Instruct. 890 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 890, "language": "Python", "license": "llama3.1", "tags": [ "AI", "LLM", "transformers", "safetensors", "llama", "facebook", "meta", "pytorch", "llama-3", "conversational", "en", "de", "fr", "it", "pt", "hi", "es", "th", "arxiv:2204.05149", "base_model:meta-llama/Llama-3.1-70B", "base_model:finetune:meta-llama/Llama-3.1-70B", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "24GB+ VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 49, "context_window_tokens": 4096, "parameters_total_b": 70, "parameters_active_b": 70, "is_multimodal": false }, "referral_url": "" }, { "slug": "hunyuanimage-3.0", "name": "Hunyuanimage 3.0", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/tencent/HunyuanImage-3.0", "description": "Open source model tencent/HunyuanImage-3.0. 640 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 640, "language": "Python", "license": "other", "tags": [ "AI", "LLM", "transformers", "safetensors", "hunyuan_image_3_moe", "text-to-image", "custom_code", "arxiv:2509.23951", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen2.5-coder-7b-instruct-awq", "name": "Qwen2.5 Coder 7B Instruct Awq", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct-AWQ", "description": "Open source model Qwen/Qwen2.5-Coder-7B-Instruct-AWQ. 19 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 19, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen2", "code", "codeqwen", "chat", "qwen", "qwen-coder", "conversational", "en", "arxiv:2409.12186", "arxiv:2309.00071", "arxiv:2407.10671", "base_model:Qwen/Qwen2.5-Coder-7B-Instruct", "base_model:quantized:Qwen/Qwen2.5-Coder-7B-Instruct", "text-generation-inference", "endpoints_compatible", "4-bit", "awq", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 5, "context_window_tokens": 4096, "parameters_total_b": 7, "parameters_active_b": 7, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-coder-30b-a3b-instruct", "name": "Qwen3 Coder 30B A3B Instruct", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen3-Coder-30B-A3B-Instruct", "description": "Open source model Qwen/Qwen3-Coder-30B-A3B-Instruct. 945 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 945, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen3_moe", "conversational", "arxiv:2505.09388", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "24GB+ VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 21, "context_window_tokens": 4096, "parameters_total_b": 30, "parameters_active_b": 30, "is_multimodal": false }, "referral_url": "" }, { "slug": "deepseek-r1-0528", "name": "Deepseek R1 0528", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", "description": "Open source model deepseek-ai/DeepSeek-R1-0528. 2400 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 2400, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "transformers", "safetensors", "deepseek_v3", "conversational", "custom_code", "arxiv:2501.12948", "eval-results", "text-generation-inference", "endpoints_compatible", "fp8", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "tiny-random-llama-3", "name": "Tiny Random Llama 3", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/llamafactory/tiny-random-Llama-3", "description": "Open source model llamafactory/tiny-random-Llama-3. 3 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 3, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "llama", "conversational", "text-generation-inference", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen2.5-coder-32b-instruct-awq", "name": "Qwen2.5 Coder 32B Instruct Awq", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct-AWQ", "description": "Open source model Qwen/Qwen2.5-Coder-32B-Instruct-AWQ. 33 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 33, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen2", "code", "codeqwen", "chat", "qwen", "qwen-coder", "conversational", "en", "arxiv:2409.12186", "arxiv:2309.00071", "arxiv:2407.10671", "base_model:Qwen/Qwen2.5-Coder-32B-Instruct", "base_model:quantized:Qwen/Qwen2.5-Coder-32B-Instruct", "text-generation-inference", "endpoints_compatible", "4-bit", "awq", "deploy:azure", "region:us" ], "hardware_req": "24GB+ VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 22, "context_window_tokens": 4096, "parameters_total_b": 32, "parameters_active_b": 32, "is_multimodal": false }, "referral_url": "" }, { "slug": "mistral-7b-instruct-v0.1", "name": "Mistral 7B Instruct V0.1", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1", "description": "Open source model mistralai/Mistral-7B-Instruct-v0.1. 1826 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 1826, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "pytorch", "safetensors", "mistral", "finetuned", "mistral-common", "conversational", "arxiv:2310.06825", "base_model:mistralai/Mistral-7B-v0.1", "base_model:finetune:mistralai/Mistral-7B-v0.1", "text-generation-inference", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 5, "context_window_tokens": 4096, "parameters_total_b": 7, "parameters_active_b": 7, "is_multimodal": false }, "referral_url": "" }, { "slug": "gpt-oss-20b-mxfp4-q8", "name": "Gpt Oss 20B Mxfp4 Q8", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/mlx-community/gpt-oss-20b-MXFP4-Q8", "description": "Open source model mlx-community/gpt-oss-20b-MXFP4-Q8. 31 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 31, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "mlx", "safetensors", "gpt_oss", "vllm", "conversational", "base_model:openai/gpt-oss-20b", "base_model:quantized:openai/gpt-oss-20b", "4-bit", "region:us" ], "hardware_req": "16GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 14, "context_window_tokens": 4096, "parameters_total_b": 20, "parameters_active_b": 20, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-embedding-4b", "name": "Qwen3 Embedding 4B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen3-Embedding-4B", "description": "Open source model Qwen/Qwen3-Embedding-4B. 224 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 224, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "sentence-transformers", "safetensors", "qwen3", "transformers", "sentence-similarity", "feature-extraction", "text-embeddings-inference", "arxiv:2506.05176", "base_model:Qwen/Qwen3-4B-Base", "base_model:finetune:Qwen/Qwen3-4B-Base", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 3, "context_window_tokens": 4096, "parameters_total_b": 4, "parameters_active_b": 4, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen2.5-1.5b-instruct-awq", "name": "Qwen2.5 1.5B Instruct Awq", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-AWQ", "description": "Open source model Qwen/Qwen2.5-1.5B-Instruct-AWQ. 6 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 6, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen2", "chat", "conversational", "en", "arxiv:2407.10671", "base_model:Qwen/Qwen2.5-1.5B-Instruct", "base_model:quantized:Qwen/Qwen2.5-1.5B-Instruct", "text-generation-inference", "endpoints_compatible", "4-bit", "awq", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 4, "context_window_tokens": 4096, "parameters_total_b": 5, "parameters_active_b": 5, "is_multimodal": false }, "referral_url": "" }, { "slug": "meta-llama-3.1-8b-instruct-fp8", "name": "Meta Llama 3.1 8B Instruct Fp8", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8", "description": "Open source model RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8. 44 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 44, "language": "Python", "license": "llama3.1", "tags": [ "AI", "LLM", "transformers", "safetensors", "llama", "fp8", "vllm", "conversational", "en", "de", "fr", "it", "pt", "hi", "es", "th", "base_model:meta-llama/Llama-3.1-8B-Instruct", "base_model:quantized:meta-llama/Llama-3.1-8B-Instruct", "text-generation-inference", "endpoints_compatible", "compressed-tensors", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 6, "context_window_tokens": 4096, "parameters_total_b": 8, "parameters_active_b": 8, "is_multimodal": false }, "referral_url": "" }, { "slug": "phi-4", "name": "Phi 4", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/microsoft/phi-4", "description": "Open source model microsoft/phi-4. 2220 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 2220, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "transformers", "safetensors", "phi3", "phi", "nlp", "math", "code", "chat", "conversational", "en", "arxiv:2412.08905", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "deepseek-r1", "name": "Deepseek R1", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/deepseek-ai/DeepSeek-R1", "description": "Open source model deepseek-ai/DeepSeek-R1. 13011 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 13011, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "transformers", "safetensors", "deepseek_v3", "conversational", "custom_code", "arxiv:2501.12948", "eval-results", "text-generation-inference", "endpoints_compatible", "fp8", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "llama-3.2-1b-instruct-fp8", "name": "Llama 3.2 1B Instruct Fp8", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/RedHatAI/Llama-3.2-1B-Instruct-FP8", "description": "Open source model RedHatAI/Llama-3.2-1B-Instruct-FP8. 3 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 3, "language": "Python", "license": "llama3.2", "tags": [ "AI", "LLM", "safetensors", "llama", "llama-3", "neuralmagic", "llmcompressor", "conversational", "en", "de", "fr", "it", "pt", "hi", "es", "th", "base_model:meta-llama/Llama-3.2-1B-Instruct", "base_model:quantized:meta-llama/Llama-3.2-1B-Instruct", "compressed-tensors", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 1, "parameters_active_b": 1, "is_multimodal": false }, "referral_url": "" }, { "slug": "llama-3.1-405b", "name": "Llama 3.1 405B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/meta-llama/Llama-3.1-405B", "description": "Open source model meta-llama/Llama-3.1-405B. 961 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 961, "language": "Python", "license": "llama3.1", "tags": [ "AI", "LLM", "transformers", "safetensors", "llama", "facebook", "meta", "pytorch", "llama-3", "en", "de", "fr", "it", "pt", "hi", "es", "th", "arxiv:2204.05149", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "24GB+ VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 284, "context_window_tokens": 4096, "parameters_total_b": 405, "parameters_active_b": 405, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-4b-thinking-2507", "name": "Qwen3 4B Thinking 2507", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen3-4B-Thinking-2507", "description": "Open source model Qwen/Qwen3-4B-Thinking-2507. 548 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 548, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen3", "conversational", "arxiv:2505.09388", "eval-results", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 3, "context_window_tokens": 4096, "parameters_total_b": 4, "parameters_active_b": 4, "is_multimodal": false }, "referral_url": "" }, { "slug": "gpt2-medium", "name": "Gpt2 Medium", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/openai-community/gpt2-medium", "description": "Open source model openai-community/gpt2-medium. 193 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 193, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "transformers", "pytorch", "tf", "jax", "rust", "onnx", "safetensors", "gpt2", "en", "arxiv:1910.09700", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "tiny-gpt2", "name": "Tiny Gpt2", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/sshleifer/tiny-gpt2", "description": "Open source model sshleifer/tiny-gpt2. 34 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 34, "language": "Python", "license": "unknown", "tags": [ "AI", "LLM", "transformers", "pytorch", "tf", "jax", "gpt2", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "hermes-3-llama-3.1-8b", "name": "Hermes 3 Llama 3.1 8B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B", "description": "Open source model NousResearch/Hermes-3-Llama-3.1-8B. 385 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 385, "language": "Python", "license": "llama3", "tags": [ "AI", "LLM", "transformers", "safetensors", "llama", "Llama-3", "instruct", "finetune", "chatml", "gpt4", "synthetic data", "distillation", "function calling", "json mode", "axolotl", "roleplaying", "chat", "conversational", "en", "arxiv:2408.11857", "base_model:meta-llama/Llama-3.1-8B", "base_model:finetune:meta-llama/Llama-3.1-8B", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 6, "context_window_tokens": 4096, "parameters_total_b": 8, "parameters_active_b": 8, "is_multimodal": false }, "referral_url": "" }, { "slug": "phi-3.5-vision-instruct", "name": "Phi 3.5 Vision Instruct", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/microsoft/Phi-3.5-vision-instruct", "description": "Open source model microsoft/Phi-3.5-vision-instruct. 726 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 726, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "transformers", "safetensors", "phi3_v", "nlp", "code", "vision", "image-text-to-text", "conversational", "custom_code", "multilingual", "arxiv:2404.14219", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": true }, "referral_url": "" }, { "slug": "minimax-m2", "name": "Minimax M2", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/MiniMaxAI/MiniMax-M2", "description": "Open source model MiniMaxAI/MiniMax-M2. 1485 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 1485, "language": "Python", "license": "other", "tags": [ "AI", "LLM", "transformers", "safetensors", "minimax_m2", "conversational", "custom_code", "arxiv:2504.07164", "arxiv:2509.06501", "arxiv:2509.13160", "eval-results", "endpoints_compatible", "fp8", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "deepseek-r1-distill-llama-8b", "name": "Deepseek R1 Distill Llama 8B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "description": "Open source model deepseek-ai/DeepSeek-R1-Distill-Llama-8B. 843 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 843, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "transformers", "safetensors", "llama", "conversational", "arxiv:2501.12948", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 6, "context_window_tokens": 4096, "parameters_total_b": 8, "parameters_active_b": 8, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-14b-awq", "name": "Qwen3 14B Awq", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen3-14B-AWQ", "description": "Open source model Qwen/Qwen3-14B-AWQ. 57 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 57, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen3", "conversational", "arxiv:2309.00071", "arxiv:2505.09388", "base_model:Qwen/Qwen3-14B", "base_model:quantized:Qwen/Qwen3-14B", "text-generation-inference", "endpoints_compatible", "4-bit", "awq", "region:us" ], "hardware_req": "16GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 10, "context_window_tokens": 4096, "parameters_total_b": 14, "parameters_active_b": 14, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-235b-a22b", "name": "Qwen3 235B A22B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen3-235B-A22B", "description": "Open source model Qwen/Qwen3-235B-A22B. 1075 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 1075, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen3_moe", "conversational", "arxiv:2309.00071", "arxiv:2505.09388", "endpoints_compatible", "region:us" ], "hardware_req": "24GB+ VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 164, "context_window_tokens": 4096, "parameters_total_b": 235, "parameters_active_b": 235, "is_multimodal": false }, "referral_url": "" }, { "slug": "meta-llama-3.1-8b-instruct-awq-int4", "name": "Meta Llama 3.1 8B Instruct Awq Int4", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4", "description": "Open source model hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4. 87 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 87, "language": "Python", "license": "llama3.1", "tags": [ "AI", "LLM", "transformers", "safetensors", "llama", "llama-3.1", "meta", "autoawq", "conversational", "en", "de", "fr", "it", "pt", "hi", "es", "th", "text-generation-inference", "endpoints_compatible", "4-bit", "awq", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 6, "context_window_tokens": 4096, "parameters_total_b": 8, "parameters_active_b": 8, "is_multimodal": false }, "referral_url": "" }, { "slug": "lfm2.5-1.2b-instruct-mlx-8bit", "name": "Lfm2.5 1.2B Instruct Mlx 8Bit", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/lmstudio-community/LFM2.5-1.2B-Instruct-MLX-8bit", "description": "Open source model lmstudio-community/LFM2.5-1.2B-Instruct-MLX-8bit. 1 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 1, "language": "Python", "license": "other", "tags": [ "AI", "LLM", "transformers", "safetensors", "lfm2", "liquid", "lfm2.5", "edge", "mlx", "conversational", "en", "ar", "zh", "fr", "de", "ja", "ko", "es", "base_model:LiquidAI/LFM2.5-1.2B-Instruct", "base_model:quantized:LiquidAI/LFM2.5-1.2B-Instruct", "endpoints_compatible", "8-bit", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 2, "parameters_active_b": 2, "is_multimodal": false }, "referral_url": "" }, { "slug": "glm-4.7-flash-gguf", "name": "Glm 4.7 Flash Gguf", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/unsloth/GLM-4.7-Flash-GGUF", "description": "Open source model unsloth/GLM-4.7-Flash-GGUF. 482 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 482, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "transformers", "gguf", "unsloth", "en", "zh", "arxiv:2508.06471", "base_model:zai-org/GLM-4.7-Flash", "base_model:quantized:zai-org/GLM-4.7-Flash", "endpoints_compatible", "deploy:azure", "region:us", "imatrix", "conversational" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "deepseek-r1-distill-qwen-14b", "name": "Deepseek R1 Distill Qwen 14B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", "description": "Open source model deepseek-ai/DeepSeek-R1-Distill-Qwen-14B. 603 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 603, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen2", "conversational", "arxiv:2501.12948", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "16GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 10, "context_window_tokens": 4096, "parameters_total_b": 14, "parameters_active_b": 14, "is_multimodal": false }, "referral_url": "" }, { "slug": "lfm2.5-1.2b-instruct-mlx-6bit", "name": "Lfm2.5 1.2B Instruct Mlx 6Bit", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/lmstudio-community/LFM2.5-1.2B-Instruct-MLX-6bit", "description": "Open source model lmstudio-community/LFM2.5-1.2B-Instruct-MLX-6bit. 4 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 4, "language": "Python", "license": "other", "tags": [ "AI", "LLM", "transformers", "safetensors", "lfm2", "liquid", "lfm2.5", "edge", "mlx", "conversational", "en", "ar", "zh", "fr", "de", "ja", "ko", "es", "base_model:LiquidAI/LFM2.5-1.2B-Instruct", "base_model:quantized:LiquidAI/LFM2.5-1.2B-Instruct", "endpoints_compatible", "6-bit", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 2, "parameters_active_b": 2, "is_multimodal": false }, "referral_url": "" }, { "slug": "lfm2.5-1.2b-instruct-mlx-4bit", "name": "Lfm2.5 1.2B Instruct Mlx 4Bit", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/lmstudio-community/LFM2.5-1.2B-Instruct-MLX-4bit", "description": "Open source model lmstudio-community/LFM2.5-1.2B-Instruct-MLX-4bit. 1 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 1, "language": "Python", "license": "other", "tags": [ "AI", "LLM", "transformers", "safetensors", "lfm2", "liquid", "lfm2.5", "edge", "mlx", "conversational", "en", "ar", "zh", "fr", "de", "ja", "ko", "es", "base_model:LiquidAI/LFM2.5-1.2B-Instruct", "base_model:quantized:LiquidAI/LFM2.5-1.2B-Instruct", "endpoints_compatible", "4-bit", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 2, "parameters_active_b": 2, "is_multimodal": false }, "referral_url": "" }, { "slug": "vicuna-7b-v1.5", "name": "Vicuna 7B V1.5", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/lmsys/vicuna-7b-v1.5", "description": "Open source model lmsys/vicuna-7b-v1.5. 387 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 387, "language": "Python", "license": "llama2", "tags": [ "AI", "LLM", "transformers", "pytorch", "llama", "arxiv:2307.09288", "arxiv:2306.05685", "text-generation-inference", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 5, "context_window_tokens": 4096, "parameters_total_b": 7, "parameters_active_b": 7, "is_multimodal": false }, "referral_url": "" }, { "slug": "llama-3.2-1b-instruct-q8_0-gguf", "name": "Llama 3.2 1B Instruct Q8_0 Gguf", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/hugging-quants/Llama-3.2-1B-Instruct-Q8_0-GGUF", "description": "Open source model hugging-quants/Llama-3.2-1B-Instruct-Q8_0-GGUF. 43 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 43, "language": "Python", "license": "unknown", "tags": [ "AI", "LLM", "gguf", "facebook", "meta", "pytorch", "llama", "llama-3", "llama-cpp", "gguf-my-repo", "en", "de", "fr", "it", "pt", "hi", "es", "th", "base_model:meta-llama/Llama-3.2-1B-Instruct", "base_model:quantized:meta-llama/Llama-3.2-1B-Instruct", "endpoints_compatible", "region:us", "conversational" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 1, "parameters_active_b": 1, "is_multimodal": false }, "referral_url": "" }, { "slug": "llama-3.3-70b-instruct-awq", "name": "Llama 3.3 70B Instruct Awq", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/kosbu/Llama-3.3-70B-Instruct-AWQ", "description": "Open source model kosbu/Llama-3.3-70B-Instruct-AWQ. 10 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 10, "language": "Python", "license": "llama3.3", "tags": [ "AI", "LLM", "transformers", "safetensors", "llama", "facebook", "meta", "llama-3", "awq", "conversational", "en", "fr", "it", "pt", "hi", "es", "th", "de", "base_model:meta-llama/Llama-3.3-70B-Instruct", "base_model:quantized:meta-llama/Llama-3.3-70B-Instruct", "text-generation-inference", "endpoints_compatible", "4-bit", "deploy:azure", "region:us" ], "hardware_req": "24GB+ VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 49, "context_window_tokens": 4096, "parameters_total_b": 70, "parameters_active_b": 70, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-32b-fp8", "name": "Qwen3 32B Fp8", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen3-32B-FP8", "description": "Open source model Qwen/Qwen3-32B-FP8. 80 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 80, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen3", "conversational", "arxiv:2309.00071", "arxiv:2505.09388", "base_model:Qwen/Qwen3-32B", "base_model:quantized:Qwen/Qwen3-32B", "text-generation-inference", "endpoints_compatible", "fp8", "region:us" ], "hardware_req": "24GB+ VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 22, "context_window_tokens": 4096, "parameters_total_b": 32, "parameters_active_b": 32, "is_multimodal": false }, "referral_url": "" }, { "slug": "gpt2-xl", "name": "Gpt2 Xl", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/openai-community/gpt2-xl", "description": "Open source model openai-community/gpt2-xl. 373 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 373, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "transformers", "pytorch", "tf", "jax", "rust", "safetensors", "gpt2", "en", "arxiv:1910.09700", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-4b-instruct-2507-fp8", "name": "Qwen3 4B Instruct 2507 Fp8", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507-FP8", "description": "Open source model Qwen/Qwen3-4B-Instruct-2507-FP8. 65 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 65, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen3", "conversational", "arxiv:2505.09388", "base_model:Qwen/Qwen3-4B-Instruct-2507", "base_model:quantized:Qwen/Qwen3-4B-Instruct-2507", "text-generation-inference", "endpoints_compatible", "fp8", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 3, "context_window_tokens": 4096, "parameters_total_b": 4, "parameters_active_b": 4, "is_multimodal": false }, "referral_url": "" }, { "slug": "xlnet-base-cased", "name": "Xlnet Base Cased", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/xlnet/xlnet-base-cased", "description": "Open source model xlnet/xlnet-base-cased. 80 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 80, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "transformers", "pytorch", "tf", "rust", "xlnet", "en", "dataset:bookcorpus", "dataset:wikipedia", "arxiv:1906.08237", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "llama-2-7b-hf", "name": "Llama 2 7B Hf", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/meta-llama/Llama-2-7b-hf", "description": "Open source model meta-llama/Llama-2-7b-hf. 2268 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 2268, "language": "Python", "license": "llama2", "tags": [ "AI", "LLM", "transformers", "pytorch", "safetensors", "llama", "facebook", "meta", "llama-2", "en", "arxiv:2307.09288", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 5, "context_window_tokens": 4096, "parameters_total_b": 7, "parameters_active_b": 7, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen2.5-math-7b-instruct", "name": "Qwen2.5 Math 7B Instruct", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen2.5-Math-7B-Instruct", "description": "Open source model Qwen/Qwen2.5-Math-7B-Instruct. 89 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 89, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen2", "chat", "conversational", "en", "arxiv:2409.12122", "base_model:Qwen/Qwen2.5-Math-7B", "base_model:finetune:Qwen/Qwen2.5-Math-7B", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 5, "context_window_tokens": 4096, "parameters_total_b": 7, "parameters_active_b": 7, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-reranker-0.6b", "name": "Qwen3 Reranker 0.6B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen3-Reranker-0.6B", "description": "Open source model Qwen/Qwen3-Reranker-0.6B. 305 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 305, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen3", "text-ranking", "arxiv:2506.05176", "base_model:Qwen/Qwen3-0.6B-Base", "base_model:finetune:Qwen/Qwen3-0.6B-Base", "text-embeddings-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 4, "context_window_tokens": 4096, "parameters_total_b": 6, "parameters_active_b": 6, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen2.5-1.5b", "name": "Qwen2.5 1.5B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen2.5-1.5B", "description": "Open source model Qwen/Qwen2.5-1.5B. 165 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 165, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen2", "conversational", "en", "arxiv:2407.10671", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 4, "context_window_tokens": 4096, "parameters_total_b": 5, "parameters_active_b": 5, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-30b-a3b-thinking-2507", "name": "Qwen3 30B A3B Thinking 2507", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen3-30B-A3B-Thinking-2507", "description": "Open source model Qwen/Qwen3-30B-A3B-Thinking-2507. 359 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 359, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen3_moe", "conversational", "arxiv:2402.17463", "arxiv:2407.02490", "arxiv:2501.15383", "arxiv:2404.06654", "arxiv:2505.09388", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "24GB+ VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 21, "context_window_tokens": 4096, "parameters_total_b": 30, "parameters_active_b": 30, "is_multimodal": false }, "referral_url": "" }, { "slug": "smollm2-135m-instruct", "name": "Smollm2 135M Instruct", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/HuggingFaceTB/SmolLM2-135M-Instruct", "description": "Open source model HuggingFaceTB/SmolLM2-135M-Instruct. 292 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 292, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "tensorboard", "onnx", "safetensors", "llama", "transformers.js", "conversational", "en", "arxiv:2502.02737", "base_model:HuggingFaceTB/SmolLM2-135M", "base_model:quantized:HuggingFaceTB/SmolLM2-135M", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen2.5-math-1.5b", "name": "Qwen2.5 Math 1.5B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen2.5-Math-1.5B", "description": "Open source model Qwen/Qwen2.5-Math-1.5B. 100 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 100, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen2", "conversational", "en", "arxiv:2409.12122", "base_model:Qwen/Qwen2.5-1.5B", "base_model:finetune:Qwen/Qwen2.5-1.5B", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 4, "context_window_tokens": 4096, "parameters_total_b": 5, "parameters_active_b": 5, "is_multimodal": false }, "referral_url": "" }, { "slug": "glm-4.5-air-awq-4bit", "name": "Glm 4.5 Air Awq 4Bit", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/cyankiwi/GLM-4.5-Air-AWQ-4bit", "description": "Open source model cyankiwi/GLM-4.5-Air-AWQ-4bit. 27 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 27, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "transformers", "safetensors", "glm4_moe", "conversational", "en", "zh", "arxiv:2508.06471", "base_model:zai-org/GLM-4.5-Air", "base_model:quantized:zai-org/GLM-4.5-Air", "endpoints_compatible", "compressed-tensors", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 3, "context_window_tokens": 4096, "parameters_total_b": 4, "parameters_active_b": 4, "is_multimodal": false }, "referral_url": "" }, { "slug": "llama-2-7b-chat-hf", "name": "Llama 2 7B Chat Hf", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/meta-llama/Llama-2-7b-chat-hf", "description": "Open source model meta-llama/Llama-2-7b-chat-hf. 4705 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 4705, "language": "Python", "license": "llama2", "tags": [ "AI", "LLM", "transformers", "pytorch", "safetensors", "llama", "facebook", "meta", "llama-2", "conversational", "en", "arxiv:2307.09288", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 5, "context_window_tokens": 4096, "parameters_total_b": 7, "parameters_active_b": 7, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen2.5-coder-7b-instruct-gptq-int4", "name": "Qwen2.5 Coder 7B Instruct Gptq Int4", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-Int4", "description": "Open source model Qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-Int4. 12 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 12, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen2", "code", "codeqwen", "chat", "qwen", "qwen-coder", "conversational", "en", "arxiv:2409.12186", "arxiv:2309.00071", "arxiv:2407.10671", "base_model:Qwen/Qwen2.5-Coder-7B-Instruct", "base_model:quantized:Qwen/Qwen2.5-Coder-7B-Instruct", "text-generation-inference", "endpoints_compatible", "4-bit", "gptq", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 5, "context_window_tokens": 4096, "parameters_total_b": 7, "parameters_active_b": 7, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-vl-30b-a3b-instruct-awq", "name": "Qwen3 Vl 30B A3B Instruct Awq", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/QuantTrio/Qwen3-VL-30B-A3B-Instruct-AWQ", "description": "Open source model QuantTrio/Qwen3-VL-30B-A3B-Instruct-AWQ. 38 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 38, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen3_vl_moe", "image-text-to-text", "AWQ", "vLLM", "conversational", "arxiv:2505.09388", "arxiv:2502.13923", "arxiv:2409.12191", "arxiv:2308.12966", "base_model:Qwen/Qwen3-VL-30B-A3B-Instruct", "base_model:quantized:Qwen/Qwen3-VL-30B-A3B-Instruct", "endpoints_compatible", "4-bit", "awq", "region:us" ], "hardware_req": "24GB+ VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 21, "context_window_tokens": 4096, "parameters_total_b": 30, "parameters_active_b": 30, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-8b-base", "name": "Qwen3 8B Base", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen3-8B-Base", "description": "Open source model Qwen/Qwen3-8B-Base. 82 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 82, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen3", "conversational", "arxiv:2505.09388", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 6, "context_window_tokens": 4096, "parameters_total_b": 8, "parameters_active_b": 8, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen2.5-coder-14b-instruct", "name": "Qwen2.5 Coder 14B Instruct", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen2.5-Coder-14B-Instruct", "description": "Open source model Qwen/Qwen2.5-Coder-14B-Instruct. 140 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 140, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen2", "code", "codeqwen", "chat", "qwen", "qwen-coder", "conversational", "en", "arxiv:2409.12186", "arxiv:2309.00071", "arxiv:2407.10671", "base_model:Qwen/Qwen2.5-Coder-14B", "base_model:finetune:Qwen/Qwen2.5-Coder-14B", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "16GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 10, "context_window_tokens": 4096, "parameters_total_b": 14, "parameters_active_b": 14, "is_multimodal": false }, "referral_url": "" }, { "slug": "stories15m_moe", "name": "Stories15M_Moe", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/ggml-org/stories15M_MOE", "description": "Open source model ggml-org/stories15M_MOE. 5 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 5, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "transformers", "safetensors", "gguf", "mixtral", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "opt-1.3b", "name": "Opt 1.3B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/facebook/opt-1.3b", "description": "Open source model facebook/opt-1.3b. 182 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 182, "language": "Python", "license": "other", "tags": [ "AI", "LLM", "transformers", "pytorch", "tf", "jax", "opt", "en", "arxiv:2205.01068", "arxiv:2005.14165", "text-generation-inference", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 2, "context_window_tokens": 4096, "parameters_total_b": 3, "parameters_active_b": 3, "is_multimodal": false }, "referral_url": "" }, { "slug": "minimax-m2-awq", "name": "Minimax M2 Awq", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/QuantTrio/MiniMax-M2-AWQ", "description": "Open source model QuantTrio/MiniMax-M2-AWQ. 8 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 8, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "mixtral", "vLLM", "AWQ", "conversational", "arxiv:2504.07164", "arxiv:2509.06501", "arxiv:2509.13160", "base_model:MiniMaxAI/MiniMax-M2", "base_model:quantized:MiniMaxAI/MiniMax-M2", "text-generation-inference", "endpoints_compatible", "4-bit", "awq", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "glm-4.7-flash-nvfp4", "name": "Glm 4.7 Flash Nvfp4", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/GadflyII/GLM-4.7-Flash-NVFP4", "description": "Open source model GadflyII/GLM-4.7-Flash-NVFP4. 62 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 62, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "glm4_moe_lite", "moe", "nvfp4", "quantized", "vllm", "glm", "30b", "conversational", "en", "zh", "base_model:zai-org/GLM-4.7-Flash", "base_model:quantized:zai-org/GLM-4.7-Flash", "endpoints_compatible", "compressed-tensors", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "hy-mt1.5-7b", "name": "Hy Mt1.5 7B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/tencent/HY-MT1.5-7B", "description": "Open source model tencent/HY-MT1.5-7B. 133 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 133, "language": "Python", "license": "unknown", "tags": [ "AI", "LLM", "transformers", "safetensors", "hunyuan_v1_dense", "translation", "zh", "en", "fr", "pt", "es", "ja", "tr", "ru", "ar", "ko", "th", "it", "de", "vi", "ms", "id", "tl", "hi", "pl", "cs", "nl", "km", "my", "fa", "gu", "ur", "te", "mr", "he", "bn", "ta", "uk", "bo", "kk", "mn", "ug", "arxiv:2512.24092", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 5, "context_window_tokens": 4096, "parameters_total_b": 7, "parameters_active_b": 7, "is_multimodal": false }, "referral_url": "" }, { "slug": "gemma-2-27b-it", "name": "Gemma 2 27B It", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/google/gemma-2-27b-it", "description": "Open source model google/gemma-2-27b-it. 559 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 559, "language": "Python", "license": "gemma", "tags": [ "AI", "LLM", "transformers", "safetensors", "gemma2", "conversational", "arxiv:2009.03300", "arxiv:1905.07830", "arxiv:1911.11641", "arxiv:1904.09728", "arxiv:1905.10044", "arxiv:1907.10641", "arxiv:1811.00937", "arxiv:1809.02789", "arxiv:1911.01547", "arxiv:1705.03551", "arxiv:2107.03374", "arxiv:2108.07732", "arxiv:2110.14168", "arxiv:2009.11462", "arxiv:2101.11718", "arxiv:2110.08193", "arxiv:1804.09301", "arxiv:2109.07958", "arxiv:1804.06876", "arxiv:2103.03874", "arxiv:2304.06364", "arxiv:2206.04615", "arxiv:2203.09509", "base_model:google/gemma-2-27b", "base_model:finetune:google/gemma-2-27b", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "24GB+ VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 19, "context_window_tokens": 4096, "parameters_total_b": 27, "parameters_active_b": 27, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-coder-next-gguf", "name": "Qwen3 Coder Next Gguf", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/unsloth/Qwen3-Coder-Next-GGUF", "description": "Open source model unsloth/Qwen3-Coder-Next-GGUF. 347 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 347, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "gguf", "qwen3_next", "unsloth", "qwen", "qwen3", "base_model:Qwen/Qwen3-Coder-Next", "base_model:quantized:Qwen/Qwen3-Coder-Next", "endpoints_compatible", "region:us", "imatrix", "conversational" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "gte-qwen2-1.5b-instruct", "name": "Gte Qwen2 1.5B Instruct", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Alibaba-NLP/gte-Qwen2-1.5B-instruct", "description": "Open source model Alibaba-NLP/gte-Qwen2-1.5B-instruct. 229 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 229, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "sentence-transformers", "safetensors", "qwen2", "mteb", "transformers", "Qwen2", "sentence-similarity", "custom_code", "arxiv:2308.03281", "model-index", "text-embeddings-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 4, "context_window_tokens": 4096, "parameters_total_b": 5, "parameters_active_b": 5, "is_multimodal": false }, "referral_url": "" }, { "slug": "lfm2-1.2b", "name": "Lfm2 1.2B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/LiquidAI/LFM2-1.2B", "description": "Open source model LiquidAI/LFM2-1.2B. 349 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 349, "language": "Python", "license": "other", "tags": [ "AI", "LLM", "transformers", "safetensors", "lfm2", "liquid", "edge", "conversational", "en", "ar", "zh", "fr", "de", "ja", "ko", "es", "arxiv:2511.23404", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 2, "parameters_active_b": 2, "is_multimodal": false }, "referral_url": "" }, { "slug": "saiga_llama3_8b", "name": "Saiga_Llama3_8B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/IlyaGusev/saiga_llama3_8b", "description": "Open source model IlyaGusev/saiga_llama3_8b. 137 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 137, "language": "Python", "license": "other", "tags": [ "AI", "LLM", "transformers", "safetensors", "llama", "conversational", "ru", "dataset:IlyaGusev/saiga_scored", "doi:10.57967/hf/2368", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 6, "context_window_tokens": 4096, "parameters_total_b": 8, "parameters_active_b": 8, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-1.7b-base", "name": "Qwen3 1.7B Base", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen3-1.7B-Base", "description": "Open source model Qwen/Qwen3-1.7B-Base. 62 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 62, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen3", "conversational", "arxiv:2505.09388", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 5, "context_window_tokens": 4096, "parameters_total_b": 7, "parameters_active_b": 7, "is_multimodal": false }, "referral_url": "" }, { "slug": "mistral-7b-v0.3-bnb-4bit", "name": "Mistral 7B V0.3 Bnb 4Bit", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/unsloth/mistral-7b-v0.3-bnb-4bit", "description": "Open source model unsloth/mistral-7b-v0.3-bnb-4bit. 22 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 22, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "mistral", "unsloth", "mistral-7b", "en", "base_model:mistralai/Mistral-7B-v0.3", "base_model:quantized:mistralai/Mistral-7B-v0.3", "text-generation-inference", "endpoints_compatible", "4-bit", "bitsandbytes", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 5, "context_window_tokens": 4096, "parameters_total_b": 7, "parameters_active_b": 7, "is_multimodal": false }, "referral_url": "" }, { "slug": "gemma-2-2b-it", "name": "Gemma 2 2B It", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/google/gemma-2-2b-it", "description": "Open source model google/gemma-2-2b-it. 1285 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 1285, "language": "Python", "license": "gemma", "tags": [ "AI", "LLM", "transformers", "safetensors", "gemma2", "conversational", "arxiv:2009.03300", "arxiv:1905.07830", "arxiv:1911.11641", "arxiv:1904.09728", "arxiv:1905.10044", "arxiv:1907.10641", "arxiv:1811.00937", "arxiv:1809.02789", "arxiv:1911.01547", "arxiv:1705.03551", "arxiv:2107.03374", "arxiv:2108.07732", "arxiv:2110.14168", "arxiv:2009.11462", "arxiv:2101.11718", "arxiv:2110.08193", "arxiv:1804.09301", "arxiv:2109.07958", "arxiv:1804.06876", "arxiv:2103.03874", "arxiv:2304.06364", "arxiv:1903.00161", "arxiv:2206.04615", "arxiv:2203.09509", "arxiv:2403.13793", "base_model:google/gemma-2-2b", "base_model:finetune:google/gemma-2-2b", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 2, "parameters_active_b": 2, "is_multimodal": false }, "referral_url": "" }, { "slug": "phi-4-multimodal-instruct", "name": "Phi 4 Multimodal Instruct", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/microsoft/Phi-4-multimodal-instruct", "description": "Open source model microsoft/Phi-4-multimodal-instruct. 1573 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 1573, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "transformers", "safetensors", "phi4mm", "nlp", "code", "audio", "automatic-speech-recognition", "speech-summarization", "speech-translation", "visual-question-answering", "phi-4-multimodal", "phi", "phi-4-mini", "custom_code", "multilingual", "ar", "zh", "cs", "da", "nl", "en", "fi", "fr", "de", "he", "hu", "it", "ja", "ko", "no", "pl", "pt", "ru", "es", "sv", "th", "tr", "uk", "arxiv:2503.01743", "arxiv:2407.13833", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "pythia-70m-deduped", "name": "Pythia 70M Deduped", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/EleutherAI/pythia-70m-deduped", "description": "Open source model EleutherAI/pythia-70m-deduped. 27 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 27, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "pytorch", "safetensors", "gpt_neox", "causal-lm", "pythia", "en", "dataset:EleutherAI/the_pile_deduplicated", "arxiv:2304.01373", "arxiv:2101.00027", "arxiv:2201.07311", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "dialogpt-medium", "name": "Dialogpt Medium", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/microsoft/DialoGPT-medium", "description": "Open source model microsoft/DialoGPT-medium. 433 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 433, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "transformers", "pytorch", "tf", "jax", "rust", "gpt2", "conversational", "arxiv:1911.00536", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "gpt-oss-20b-bf16", "name": "Gpt Oss 20B Bf16", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/unsloth/gpt-oss-20b-BF16", "description": "Open source model unsloth/gpt-oss-20b-BF16. 29 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 29, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "gpt_oss", "vllm", "unsloth", "conversational", "base_model:openai/gpt-oss-20b", "base_model:finetune:openai/gpt-oss-20b", "endpoints_compatible", "region:us" ], "hardware_req": "16GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 14, "context_window_tokens": 4096, "parameters_total_b": 20, "parameters_active_b": 20, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen2.5-72b-instruct", "name": "Qwen2.5 72B Instruct", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct", "description": "Open source model Qwen/Qwen2.5-72B-Instruct. 910 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 910, "language": "Python", "license": "other", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen2", "chat", "conversational", "en", "arxiv:2309.00071", "arxiv:2407.10671", "base_model:Qwen/Qwen2.5-72B", "base_model:finetune:Qwen/Qwen2.5-72B", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "24GB+ VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 50, "context_window_tokens": 4096, "parameters_total_b": 72, "parameters_active_b": 72, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-32b-awq", "name": "Qwen3 32B Awq", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen3-32B-AWQ", "description": "Open source model Qwen/Qwen3-32B-AWQ. 125 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 125, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen3", "conversational", "arxiv:2309.00071", "arxiv:2505.09388", "base_model:Qwen/Qwen3-32B", "base_model:quantized:Qwen/Qwen3-32B", "text-generation-inference", "endpoints_compatible", "4-bit", "awq", "region:us" ], "hardware_req": "24GB+ VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 22, "context_window_tokens": 4096, "parameters_total_b": 32, "parameters_active_b": 32, "is_multimodal": false }, "referral_url": "" }, { "slug": "mimo-v2-flash", "name": "Mimo V2 Flash", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/XiaomiMiMo/MiMo-V2-Flash", "description": "Open source model XiaomiMiMo/MiMo-V2-Flash. 628 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 628, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "transformers", "safetensors", "mimo_v2_flash", "conversational", "custom_code", "eval-results", "fp8", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-coder-30b-a3b-instruct-fp8", "name": "Qwen3 Coder 30B A3B Instruct Fp8", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8", "description": "Open source model Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8. 158 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 158, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen3_moe", "conversational", "arxiv:2505.09388", "endpoints_compatible", "fp8", "deploy:azure", "region:us" ], "hardware_req": "24GB+ VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 21, "context_window_tokens": 4096, "parameters_total_b": 30, "parameters_active_b": 30, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-8b-fp8", "name": "Qwen3 8B Fp8", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen3-8B-FP8", "description": "Open source model Qwen/Qwen3-8B-FP8. 56 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 56, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen3", "conversational", "arxiv:2309.00071", "arxiv:2505.09388", "base_model:Qwen/Qwen3-8B", "base_model:quantized:Qwen/Qwen3-8B", "text-generation-inference", "endpoints_compatible", "fp8", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 6, "context_window_tokens": 4096, "parameters_total_b": 8, "parameters_active_b": 8, "is_multimodal": false }, "referral_url": "" }, { "slug": "deepseek-v3.2", "name": "Deepseek V3.2", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/deepseek-ai/DeepSeek-V3.2", "description": "Open source model deepseek-ai/DeepSeek-V3.2. 1251 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 1251, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "transformers", "safetensors", "deepseek_v32", "conversational", "base_model:deepseek-ai/DeepSeek-V3.2-Exp-Base", "base_model:finetune:deepseek-ai/DeepSeek-V3.2-Exp-Base", "eval-results", "endpoints_compatible", "fp8", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-coder-next", "name": "Qwen3 Coder Next", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen3-Coder-Next", "description": "Open source model Qwen/Qwen3-Coder-Next. 912 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 912, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen3_next", "conversational", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen2-0.5b", "name": "Qwen2 0.5B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen2-0.5B", "description": "Open source model Qwen/Qwen2-0.5B. 164 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 164, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen2", "pretrained", "conversational", "en", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 4, "context_window_tokens": 4096, "parameters_total_b": 5, "parameters_active_b": 5, "is_multimodal": false }, "referral_url": "" }, { "slug": "mistral-7b-v0.1", "name": "Mistral 7B V0.1", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/mistralai/Mistral-7B-v0.1", "description": "Open source model mistralai/Mistral-7B-v0.1. 4042 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 4042, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "pytorch", "safetensors", "mistral", "pretrained", "mistral-common", "en", "arxiv:2310.06825", "text-generation-inference", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 5, "context_window_tokens": 4096, "parameters_total_b": 7, "parameters_active_b": 7, "is_multimodal": false }, "referral_url": "" }, { "slug": "kimi-k2-thinking", "name": "Kimi K2 Thinking", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/moonshotai/Kimi-K2-Thinking", "description": "Open source model moonshotai/Kimi-K2-Thinking. 1670 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 1670, "language": "Python", "license": "other", "tags": [ "AI", "LLM", "transformers", "safetensors", "kimi_k2", "conversational", "custom_code", "eval-results", "endpoints_compatible", "compressed-tensors", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "deepseek-r1-0528-qwen3-8b-mlx-4bit", "name": "Deepseek R1 0528 Qwen3 8B Mlx 4Bit", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/lmstudio-community/DeepSeek-R1-0528-Qwen3-8B-MLX-4bit", "description": "Open source model lmstudio-community/DeepSeek-R1-0528-Qwen3-8B-MLX-4bit. 7 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 7, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "mlx", "safetensors", "qwen3", "conversational", "base_model:deepseek-ai/DeepSeek-R1-0528-Qwen3-8B", "base_model:quantized:deepseek-ai/DeepSeek-R1-0528-Qwen3-8B", "4-bit", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 6, "context_window_tokens": 4096, "parameters_total_b": 8, "parameters_active_b": 8, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen2.5-7b-instruct-awq", "name": "Qwen2.5 7B Instruct Awq", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct-AWQ", "description": "Open source model Qwen/Qwen2.5-7B-Instruct-AWQ. 36 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 36, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen2", "chat", "conversational", "en", "arxiv:2309.00071", "arxiv:2407.10671", "base_model:Qwen/Qwen2.5-7B-Instruct", "base_model:quantized:Qwen/Qwen2.5-7B-Instruct", "text-generation-inference", "endpoints_compatible", "4-bit", "awq", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 5, "context_window_tokens": 4096, "parameters_total_b": 7, "parameters_active_b": 7, "is_multimodal": false }, "referral_url": "" }, { "slug": "points-reader", "name": "Points Reader", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/tencent/POINTS-Reader", "description": "Open source model tencent/POINTS-Reader. 100 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 100, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "image-text-to-text", "conversational", "custom_code", "arxiv:2509.01215", "arxiv:2412.08443", "arxiv:2409.04828", "arxiv:2405.11850", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-4b-base", "name": "Qwen3 4B Base", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen3-4B-Base", "description": "Open source model Qwen/Qwen3-4B-Base. 80 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 80, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen3", "conversational", "arxiv:2505.09388", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 3, "context_window_tokens": 4096, "parameters_total_b": 4, "parameters_active_b": 4, "is_multimodal": false }, "referral_url": "" }, { "slug": "step-3.5-flash", "name": "Step 3.5 Flash", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/stepfun-ai/Step-3.5-Flash", "description": "Open source model stepfun-ai/Step-3.5-Flash. 621 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 621, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "step3p5", "conversational", "custom_code", "arxiv:2602.10604", "arxiv:2601.05593", "arxiv:2507.19427", "eval-results", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "kogpt2-base-v2", "name": "Kogpt2 Base V2", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/skt/kogpt2-base-v2", "description": "Open source model skt/kogpt2-base-v2. 60 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 60, "language": "Python", "license": "cc-by-nc-sa-4.0", "tags": [ "AI", "LLM", "transformers", "pytorch", "jax", "gpt2", "ko", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "parler-tts-mini-multilingual-v1.1", "name": "Parler Tts Mini Multilingual V1.1", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/parler-tts/parler-tts-mini-multilingual-v1.1", "description": "Open source model parler-tts/parler-tts-mini-multilingual-v1.1. 54 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 54, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "parler_tts", "text-to-speech", "annotation", "en", "fr", "es", "pt", "pl", "de", "nl", "it", "dataset:facebook/multilingual_librispeech", "dataset:parler-tts/libritts_r_filtered", "dataset:parler-tts/libritts-r-filtered-speaker-descriptions", "dataset:parler-tts/mls_eng", "dataset:parler-tts/mls-eng-speaker-descriptions", "dataset:ylacombe/mls-annotated", "dataset:ylacombe/cml-tts-filtered-annotated", "dataset:PHBJT/cml-tts-filtered", "arxiv:2402.01912", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-reranker-8b", "name": "Qwen3 Reranker 8B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen3-Reranker-8B", "description": "Open source model Qwen/Qwen3-Reranker-8B. 213 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 213, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen3", "text-ranking", "arxiv:2506.05176", "base_model:Qwen/Qwen3-8B-Base", "base_model:finetune:Qwen/Qwen3-8B-Base", "text-embeddings-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 6, "context_window_tokens": 4096, "parameters_total_b": 8, "parameters_active_b": 8, "is_multimodal": false }, "referral_url": "" }, { "slug": "deepseek-r1-0528-qwen3-8b-mlx-8bit", "name": "Deepseek R1 0528 Qwen3 8B Mlx 8Bit", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/lmstudio-community/DeepSeek-R1-0528-Qwen3-8B-MLX-8bit", "description": "Open source model lmstudio-community/DeepSeek-R1-0528-Qwen3-8B-MLX-8bit. 13 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 13, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "mlx", "safetensors", "qwen3", "conversational", "base_model:deepseek-ai/DeepSeek-R1-0528-Qwen3-8B", "base_model:quantized:deepseek-ai/DeepSeek-R1-0528-Qwen3-8B", "8-bit", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 6, "context_window_tokens": 4096, "parameters_total_b": 8, "parameters_active_b": 8, "is_multimodal": false }, "referral_url": "" }, { "slug": "powermoe-3b", "name": "Powermoe 3B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/ibm-research/PowerMoE-3b", "description": "Open source model ibm-research/PowerMoE-3b. 14 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 14, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "granitemoe", "arxiv:2408.13359", "model-index", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 2, "context_window_tokens": 4096, "parameters_total_b": 3, "parameters_active_b": 3, "is_multimodal": false }, "referral_url": "" }, { "slug": "llada-8b-instruct", "name": "Llada 8B Instruct", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/GSAI-ML/LLaDA-8B-Instruct", "description": "Open source model GSAI-ML/LLaDA-8B-Instruct. 342 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 342, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "transformers", "safetensors", "llada", "conversational", "custom_code", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 6, "context_window_tokens": 4096, "parameters_total_b": 8, "parameters_active_b": 8, "is_multimodal": false }, "referral_url": "" }, { "slug": "apertus-8b-instruct-2509", "name": "Apertus 8B Instruct 2509", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/swiss-ai/Apertus-8B-Instruct-2509", "description": "Open source model swiss-ai/Apertus-8B-Instruct-2509. 435 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 435, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "apertus", "multilingual", "compliant", "swiss-ai", "conversational", "arxiv:2509.14233", "base_model:swiss-ai/Apertus-8B-2509", "base_model:finetune:swiss-ai/Apertus-8B-2509", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 6, "context_window_tokens": 4096, "parameters_total_b": 8, "parameters_active_b": 8, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-30b-a3b-gptq-int4", "name": "Qwen3 30B A3B Gptq Int4", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen3-30B-A3B-GPTQ-Int4", "description": "Open source model Qwen/Qwen3-30B-A3B-GPTQ-Int4. 45 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 45, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen3_moe", "conversational", "arxiv:2309.00071", "arxiv:2505.09388", "base_model:Qwen/Qwen3-30B-A3B", "base_model:quantized:Qwen/Qwen3-30B-A3B", "endpoints_compatible", "4-bit", "gptq", "region:us" ], "hardware_req": "24GB+ VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 21, "context_window_tokens": 4096, "parameters_total_b": 30, "parameters_active_b": 30, "is_multimodal": false }, "referral_url": "" }, { "slug": "tinyllama-1.1b-chat-v0.3-gptq", "name": "Tinyllama 1.1B Chat V0.3 Gptq", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GPTQ", "description": "Open source model TheBloke/TinyLlama-1.1B-Chat-v0.3-GPTQ. 9 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 9, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "llama", "en", "dataset:cerebras/SlimPajama-627B", "dataset:bigcode/starcoderdata", "dataset:OpenAssistant/oasst_top1_2023-08-25", "base_model:TinyLlama/TinyLlama-1.1B-Chat-v0.3", "base_model:quantized:TinyLlama/TinyLlama-1.1B-Chat-v0.3", "text-generation-inference", "4-bit", "gptq", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 1, "parameters_active_b": 1, "is_multimodal": false }, "referral_url": "" }, { "slug": "prot_t5_xl_bfd", "name": "Prot_T5_Xl_Bfd", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Rostlab/prot_t5_xl_bfd", "description": "Open source model Rostlab/prot_t5_xl_bfd. 10 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 10, "language": "Python", "license": "unknown", "tags": [ "AI", "LLM", "transformers", "pytorch", "tf", "t5", "protein language model", "dataset:BFD", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-4b-instruct-2507-unsloth-bnb-4bit", "name": "Qwen3 4B Instruct 2507 Unsloth Bnb 4Bit", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/unsloth/Qwen3-4B-Instruct-2507-unsloth-bnb-4bit", "description": "Open source model unsloth/Qwen3-4B-Instruct-2507-unsloth-bnb-4bit. 13 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 13, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen3", "unsloth", "conversational", "arxiv:2505.09388", "base_model:Qwen/Qwen3-4B-Instruct-2507", "base_model:quantized:Qwen/Qwen3-4B-Instruct-2507", "text-generation-inference", "endpoints_compatible", "4-bit", "bitsandbytes", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 3, "context_window_tokens": 4096, "parameters_total_b": 4, "parameters_active_b": 4, "is_multimodal": false }, "referral_url": "" }, { "slug": "phi-3.5-mini-instruct", "name": "Phi 3.5 Mini Instruct", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/microsoft/Phi-3.5-mini-instruct", "description": "Open source model microsoft/Phi-3.5-mini-instruct. 963 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 963, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "transformers", "safetensors", "phi3", "nlp", "code", "conversational", "custom_code", "multilingual", "arxiv:2404.14219", "arxiv:2407.13833", "arxiv:2403.06412", "eval-results", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "meta-llama-3.1-8b-instruct-bnb-4bit", "name": "Meta Llama 3.1 8B Instruct Bnb 4Bit", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit", "description": "Open source model unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit. 95 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 95, "language": "Python", "license": "llama3.1", "tags": [ "AI", "LLM", "transformers", "safetensors", "llama", "llama-3", "meta", "facebook", "unsloth", "conversational", "en", "arxiv:2204.05149", "base_model:meta-llama/Llama-3.1-8B-Instruct", "base_model:quantized:meta-llama/Llama-3.1-8B-Instruct", "text-generation-inference", "endpoints_compatible", "4-bit", "bitsandbytes", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 6, "context_window_tokens": 4096, "parameters_total_b": 8, "parameters_active_b": 8, "is_multimodal": false }, "referral_url": "" }, { "slug": "glm-4.7-flash-awq-4bit", "name": "Glm 4.7 Flash Awq 4Bit", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/cyankiwi/GLM-4.7-Flash-AWQ-4bit", "description": "Open source model cyankiwi/GLM-4.7-Flash-AWQ-4bit. 43 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 43, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "transformers", "safetensors", "glm4_moe_lite", "conversational", "en", "zh", "arxiv:2508.06471", "base_model:zai-org/GLM-4.7-Flash", "base_model:quantized:zai-org/GLM-4.7-Flash", "endpoints_compatible", "compressed-tensors", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 3, "context_window_tokens": 4096, "parameters_total_b": 4, "parameters_active_b": 4, "is_multimodal": false }, "referral_url": "" }, { "slug": "dots.ocr", "name": "Dots.Ocr", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/rednote-hilab/dots.ocr", "description": "Open source model rednote-hilab/dots.ocr. 1243 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 1243, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "dots_ocr", "safetensors", "image-to-text", "ocr", "document-parse", "layout", "table", "formula", "transformers", "custom_code", "image-text-to-text", "conversational", "en", "zh", "multilingual", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "mistral-7b-bnb-4bit", "name": "Mistral 7B Bnb 4Bit", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/unsloth/mistral-7b-bnb-4bit", "description": "Open source model unsloth/mistral-7b-bnb-4bit. 30 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 30, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "mistral", "unsloth", "mistral-7b", "bnb", "en", "text-generation-inference", "endpoints_compatible", "4-bit", "bitsandbytes", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 5, "context_window_tokens": 4096, "parameters_total_b": 7, "parameters_active_b": 7, "is_multimodal": false }, "referral_url": "" }, { "slug": "glm-5-fp8", "name": "Glm 5 Fp8", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/zai-org/GLM-5-FP8", "description": "Open source model zai-org/GLM-5-FP8. 108 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 108, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "transformers", "safetensors", "glm_moe_dsa", "conversational", "en", "zh", "endpoints_compatible", "fp8", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen-7b", "name": "Qwen 7B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen-7B", "description": "Open source model Qwen/Qwen-7B. 395 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 395, "language": "Python", "license": "other", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen", "custom_code", "zh", "en", "arxiv:2309.16609", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 5, "context_window_tokens": 4096, "parameters_total_b": 7, "parameters_active_b": 7, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwq-32b-awq", "name": "Qwq 32B Awq", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/QwQ-32B-AWQ", "description": "Open source model Qwen/QwQ-32B-AWQ. 133 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 133, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "safetensors", "qwen2", "chat", "conversational", "en", "arxiv:2309.00071", "arxiv:2412.15115", "base_model:Qwen/QwQ-32B", "base_model:quantized:Qwen/QwQ-32B", "4-bit", "awq", "region:us" ], "hardware_req": "24GB+ VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 22, "context_window_tokens": 4096, "parameters_total_b": 32, "parameters_active_b": 32, "is_multimodal": false }, "referral_url": "" }, { "slug": "deepseek-r1-distill-llama-70b", "name": "Deepseek R1 Distill Llama 70B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B", "description": "Open source model deepseek-ai/DeepSeek-R1-Distill-Llama-70B. 741 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 741, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "transformers", "safetensors", "llama", "conversational", "arxiv:2501.12948", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "24GB+ VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 49, "context_window_tokens": 4096, "parameters_total_b": 70, "parameters_active_b": 70, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen2.5-coder-7b", "name": "Qwen2.5 Coder 7B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen2.5-Coder-7B", "description": "Open source model Qwen/Qwen2.5-Coder-7B. 134 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 134, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen2", "code", "qwen", "qwen-coder", "codeqwen", "conversational", "en", "arxiv:2409.12186", "arxiv:2309.00071", "arxiv:2407.10671", "base_model:Qwen/Qwen2.5-7B", "base_model:finetune:Qwen/Qwen2.5-7B", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 5, "context_window_tokens": 4096, "parameters_total_b": 7, "parameters_active_b": 7, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen2.5-3b", "name": "Qwen2.5 3B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen2.5-3B", "description": "Open source model Qwen/Qwen2.5-3B. 169 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 169, "language": "Python", "license": "other", "tags": [ "AI", "LLM", "safetensors", "qwen2", "conversational", "en", "arxiv:2407.10671", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 2, "context_window_tokens": 4096, "parameters_total_b": 3, "parameters_active_b": 3, "is_multimodal": false }, "referral_url": "" }, { "slug": "deepseek-v2-lite-chat", "name": "Deepseek V2 Lite Chat", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/deepseek-ai/DeepSeek-V2-Lite-Chat", "description": "Open source model deepseek-ai/DeepSeek-V2-Lite-Chat. 133 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 133, "language": "Python", "license": "other", "tags": [ "AI", "LLM", "transformers", "safetensors", "deepseek_v2", "conversational", "custom_code", "arxiv:2405.04434", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "tiny-qwen3forcausallm", "name": "Tiny Qwen3Forcausallm", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/trl-internal-testing/tiny-Qwen3ForCausalLM", "description": "Open source model trl-internal-testing/tiny-Qwen3ForCausalLM. 1 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 1, "language": "Python", "license": "unknown", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen3", "trl", "conversational", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "deepseek-coder-v2-lite-instruct", "name": "Deepseek Coder V2 Lite Instruct", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct", "description": "Open source model deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct. 539 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 539, "language": "Python", "license": "other", "tags": [ "AI", "LLM", "transformers", "safetensors", "deepseek_v2", "conversational", "custom_code", "arxiv:2401.06066", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen3-0.6b-base", "name": "Qwen3 0.6B Base", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen3-0.6B-Base", "description": "Open source model Qwen/Qwen3-0.6B-Base. 146 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 146, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen3", "conversational", "arxiv:2505.09388", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 4, "context_window_tokens": 4096, "parameters_total_b": 6, "parameters_active_b": 6, "is_multimodal": false }, "referral_url": "" }, { "slug": "diffractgpt_mistral_chemical_formula", "name": "Diffractgpt_Mistral_Chemical_Formula", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/knc6/diffractgpt_mistral_chemical_formula", "description": "Open source model knc6/diffractgpt_mistral_chemical_formula. 1 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 1, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "peft", "safetensors", "chemistry", "text-generation-inference", "atomgpt", "diffraction", "en", "base_model:unsloth/mistral-7b-bnb-4bit", "base_model:adapter:unsloth/mistral-7b-bnb-4bit", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "qwen-7b-chat", "name": "Qwen 7B Chat", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Qwen/Qwen-7B-Chat", "description": "Open source model Qwen/Qwen-7B-Chat. 787 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 787, "language": "Python", "license": "other", "tags": [ "AI", "LLM", "transformers", "safetensors", "qwen", "custom_code", "zh", "en", "arxiv:2309.16609", "arxiv:2305.08322", "arxiv:2009.03300", "arxiv:2305.05280", "arxiv:2210.03629", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 5, "context_window_tokens": 4096, "parameters_total_b": 7, "parameters_active_b": 7, "is_multimodal": false }, "referral_url": "" }, { "slug": "nvidia-nemotron-3-nano-30b-a3b-nvfp4", "name": "Nvidia Nemotron 3 Nano 30B A3B Nvfp4", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4", "description": "Open source model nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4. 100 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 100, "language": "Python", "license": "other", "tags": [ "AI", "LLM", "transformers", "safetensors", "nemotron_h", "feature-extraction", "nvidia", "pytorch", "conversational", "custom_code", "en", "es", "fr", "de", "ja", "it", "dataset:nvidia/Nemotron-Pretraining-Code-v1", "dataset:nvidia/Nemotron-CC-v2", "dataset:nvidia/Nemotron-Pretraining-SFT-v1", "dataset:nvidia/Nemotron-CC-Math-v1", "dataset:nvidia/Nemotron-Pretraining-Code-v2", "dataset:nvidia/Nemotron-Pretraining-Specialized-v1", "dataset:nvidia/Nemotron-CC-v2.1", "dataset:nvidia/Nemotron-CC-Code-v1", "dataset:nvidia/Nemotron-Pretraining-Dataset-sample", "dataset:nvidia/Nemotron-Competitive-Programming-v1", "dataset:nvidia/Nemotron-Math-v2", "dataset:nvidia/Nemotron-Agentic-v1", "dataset:nvidia/Nemotron-Math-Proofs-v1", "dataset:nvidia/Nemotron-Instruction-Following-Chat-v1", "dataset:nvidia/Nemotron-Science-v1", "dataset:nvidia/Nemotron-3-Nano-RL-Training-Blend", "arxiv:2512.20848", "arxiv:2512.20856", "arxiv:2601.20088", "base_model:nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", "base_model:quantized:nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", "region:us" ], "hardware_req": "24GB+ VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 21, "context_window_tokens": 4096, "parameters_total_b": 30, "parameters_active_b": 30, "is_multimodal": false }, "referral_url": "" }, { "slug": "falcon-h1-tiny-90m-instruct", "name": "Falcon H1 Tiny 90M Instruct", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/tiiuae/Falcon-H1-Tiny-90M-Instruct", "description": "Open source model tiiuae/Falcon-H1-Tiny-90M-Instruct. 31 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 31, "language": "Python", "license": "other", "tags": [ "AI", "LLM", "transformers", "safetensors", "falcon_h1", "falcon-h1", "edge", "conversational", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "hermes-3-llama-3.2-3b", "name": "Hermes 3 Llama 3.2 3B", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/NousResearch/Hermes-3-Llama-3.2-3B", "description": "Open source model NousResearch/Hermes-3-Llama-3.2-3B. 174 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 174, "language": "Python", "license": "llama3", "tags": [ "AI", "LLM", "transformers", "safetensors", "llama", "Llama-3", "instruct", "finetune", "chatml", "gpt4", "synthetic data", "distillation", "function calling", "json mode", "axolotl", "roleplaying", "chat", "conversational", "en", "arxiv:2408.11857", "text-generation-inference", "endpoints_compatible", "deploy:azure", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 2, "context_window_tokens": 4096, "parameters_total_b": 3, "parameters_active_b": 3, "is_multimodal": false }, "referral_url": "" }, { "slug": "meta-llama-3.1-8b-instruct", "name": "Meta Llama 3.1 8B Instruct", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/unsloth/Meta-Llama-3.1-8B-Instruct", "description": "Open source model unsloth/Meta-Llama-3.1-8B-Instruct. 94 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 94, "language": "Python", "license": "llama3.1", "tags": [ "AI", "LLM", "transformers", "safetensors", "llama", "llama-3", "meta", "facebook", "unsloth", "conversational", "en", "base_model:meta-llama/Llama-3.1-8B-Instruct", "base_model:finetune:meta-llama/Llama-3.1-8B-Instruct", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 6, "context_window_tokens": 4096, "parameters_total_b": 8, "parameters_active_b": 8, "is_multimodal": false }, "referral_url": "" }, { "slug": "meta-llama-3.1-8b-instruct-gguf", "name": "Meta Llama 3.1 8B Instruct Gguf", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/bartowski/Meta-Llama-3.1-8B-Instruct-GGUF", "description": "Open source model bartowski/Meta-Llama-3.1-8B-Instruct-GGUF. 321 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 321, "language": "Python", "license": "llama3.1", "tags": [ "AI", "LLM", "gguf", "facebook", "meta", "pytorch", "llama", "llama-3", "en", "de", "fr", "it", "pt", "hi", "es", "th", "base_model:meta-llama/Llama-3.1-8B-Instruct", "base_model:quantized:meta-llama/Llama-3.1-8B-Instruct", "endpoints_compatible", "region:us", "conversational" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 6, "context_window_tokens": 4096, "parameters_total_b": 8, "parameters_active_b": 8, "is_multimodal": false }, "referral_url": "" }, { "slug": "deepseek-v3-0324", "name": "Deepseek V3 0324", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/deepseek-ai/DeepSeek-V3-0324", "description": "Open source model deepseek-ai/DeepSeek-V3-0324. 3087 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 3087, "language": "Python", "license": "mit", "tags": [ "AI", "LLM", "transformers", "safetensors", "deepseek_v3", "conversational", "custom_code", "arxiv:2412.19437", "eval-results", "text-generation-inference", "endpoints_compatible", "fp8", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "elm", "name": "Elm", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/Joaoffg/ELM", "description": "Open source model Joaoffg/ELM. 2 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 2, "language": "Python", "license": "llama2", "tags": [ "AI", "LLM", "transformers", "safetensors", "llama", "academic", "university", "en", "nl", "arxiv:2408.06931", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" }, { "slug": "llama-2-13b-chat-hf", "name": "Llama 2 13B Chat Hf", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/meta-llama/Llama-2-13b-chat-hf", "description": "Open source model meta-llama/Llama-2-13b-chat-hf. 1109 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 1109, "language": "Python", "license": "llama2", "tags": [ "AI", "LLM", "transformers", "pytorch", "safetensors", "llama", "facebook", "meta", "llama-2", "conversational", "en", "arxiv:2307.09288", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "16GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 9, "context_window_tokens": 4096, "parameters_total_b": 13, "parameters_active_b": 13, "is_multimodal": false }, "referral_url": "" }, { "slug": "svara-tts-v1", "name": "Svara Tts V1", "category": "AI Models", "is_open_source": true, "website": "https://huggingface.co/kenpath/svara-tts-v1", "description": "Open source model kenpath/svara-tts-v1. 18 likes on Hugging Face.", "pros": [ "Open Source", "Running Locally" ], "cons": [ "Requires GPU" ], "stars": 18, "language": "Python", "license": "apache-2.0", "tags": [ "AI", "LLM", "transformers", "safetensors", "llama", "text-to-speech", "speech-synthesis", "multilingual", "indic", "orpheus", "lora", "low-latency", "gguf", "zero-shot", "emotions", "discrete-audio-tokens", "hi", "bn", "mr", "te", "kn", "bho", "mag", "hne", "mai", "as", "brx", "doi", "gu", "ml", "pa", "ta", "ne", "sa", "en", "dataset:SYSPIN", "dataset:RASA", "dataset:IndicTTS", "dataset:SPICOR", "base_model:canopylabs/3b-hi-ft-research_release", "base_model:adapter:canopylabs/3b-hi-ft-research_release", "text-generation-inference", "endpoints_compatible", "region:us" ], "hardware_req": "8GB VRAM", "hosting_type": "self-hosted", "ai_metadata": { "vram_inference_gb": 1, "context_window_tokens": 4096, "parameters_total_b": 0, "parameters_active_b": 0, "is_multimodal": false }, "referral_url": "" } ]