altstack-data/data/tools_expanded.json

[
  {
    "slug": "qwen2.5-7b-instruct",
    "name": "Qwen2.5 7B Instruct",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct",
    "description": "Open source model Qwen/Qwen2.5-7B-Instruct. 1073 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 1073,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen2",
      "chat",
      "conversational",
      "en",
      "arxiv:2309.00071",
      "arxiv:2407.10671",
      "base_model:Qwen/Qwen2.5-7B",
      "base_model:finetune:Qwen/Qwen2.5-7B",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 5,
      "context_window_tokens": 4096,
      "parameters_total_b": 7,
      "parameters_active_b": 7,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-0.6b",
    "name": "Qwen3 0.6B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen3-0.6B",
    "description": "Open source model Qwen/Qwen3-0.6B. 1083 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 1083,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen3",
      "conversational",
      "arxiv:2505.09388",
      "base_model:Qwen/Qwen3-0.6B-Base",
      "base_model:finetune:Qwen/Qwen3-0.6B-Base",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 4,
      "context_window_tokens": 4096,
      "parameters_total_b": 6,
      "parameters_active_b": 6,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "gpt2",
    "name": "Gpt2",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/openai-community/gpt2",
    "description": "Open source model openai-community/gpt2. 3114 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 3114,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "pytorch",
      "tf",
      "jax",
      "tflite",
      "rust",
      "onnx",
      "safetensors",
      "gpt2",
      "exbert",
      "en",
      "doi:10.57967/hf/0039",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen2.5-1.5b-instruct",
    "name": "Qwen2.5 1.5B Instruct",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct",
    "description": "Open source model Qwen/Qwen2.5-1.5B-Instruct. 617 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 617,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen2",
      "chat",
      "conversational",
      "en",
      "arxiv:2407.10671",
      "base_model:Qwen/Qwen2.5-1.5B",
      "base_model:finetune:Qwen/Qwen2.5-1.5B",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 4,
      "context_window_tokens": 4096,
      "parameters_total_b": 5,
      "parameters_active_b": 5,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen2.5-3b-instruct",
    "name": "Qwen2.5 3B Instruct",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen2.5-3B-Instruct",
    "description": "Open source model Qwen/Qwen2.5-3B-Instruct. 404 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 404,
    "language": "Python",
    "license": "other",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen2",
      "chat",
      "conversational",
      "en",
      "arxiv:2407.10671",
      "base_model:Qwen/Qwen2.5-3B",
      "base_model:finetune:Qwen/Qwen2.5-3B",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 2,
      "context_window_tokens": 4096,
      "parameters_total_b": 3,
      "parameters_active_b": 3,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "llama-3.1-8b-instruct",
    "name": "Llama 3.1 8B Instruct",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct",
    "description": "Open source model meta-llama/Llama-3.1-8B-Instruct. 5467 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 5467,
    "language": "Python",
    "license": "llama3.1",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "llama",
      "facebook",
      "meta",
      "pytorch",
      "llama-3",
      "conversational",
      "en",
      "de",
      "fr",
      "it",
      "pt",
      "hi",
      "es",
      "th",
      "arxiv:2204.05149",
      "base_model:meta-llama/Llama-3.1-8B",
      "base_model:finetune:meta-llama/Llama-3.1-8B",
      "eval-results",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 6,
      "context_window_tokens": 4096,
      "parameters_total_b": 8,
      "parameters_active_b": 8,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "gpt-oss-20b",
    "name": "Gpt Oss 20B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/openai/gpt-oss-20b",
    "description": "Open source model openai/gpt-oss-20b. 4378 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 4378,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "gpt_oss",
      "vllm",
      "conversational",
      "arxiv:2508.10925",
      "endpoints_compatible",
      "8-bit",
      "mxfp4",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "16GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 14,
      "context_window_tokens": 4096,
      "parameters_total_b": 20,
      "parameters_active_b": 20,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen2.5-0.5b-instruct",
    "name": "Qwen2.5 0.5B Instruct",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct",
    "description": "Open source model Qwen/Qwen2.5-0.5B-Instruct. 463 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 463,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen2",
      "chat",
      "conversational",
      "en",
      "arxiv:2407.10671",
      "base_model:Qwen/Qwen2.5-0.5B",
      "base_model:finetune:Qwen/Qwen2.5-0.5B",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 4,
      "context_window_tokens": 4096,
      "parameters_total_b": 5,
      "parameters_active_b": 5,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-4b",
    "name": "Qwen3 4B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen3-4B",
    "description": "Open source model Qwen/Qwen3-4B. 552 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 552,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen3",
      "conversational",
      "arxiv:2309.00071",
      "arxiv:2505.09388",
      "base_model:Qwen/Qwen3-4B-Base",
      "base_model:finetune:Qwen/Qwen3-4B-Base",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 3,
      "context_window_tokens": 4096,
      "parameters_total_b": 4,
      "parameters_active_b": 4,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-8b",
    "name": "Qwen3 8B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen3-8B",
    "description": "Open source model Qwen/Qwen3-8B. 940 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 940,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen3",
      "conversational",
      "arxiv:2309.00071",
      "arxiv:2505.09388",
      "base_model:Qwen/Qwen3-8B-Base",
      "base_model:finetune:Qwen/Qwen3-8B-Base",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 6,
      "context_window_tokens": 4096,
      "parameters_total_b": 8,
      "parameters_active_b": 8,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen2.5-32b-instruct",
    "name": "Qwen2.5 32B Instruct",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen2.5-32B-Instruct",
    "description": "Open source model Qwen/Qwen2.5-32B-Instruct. 328 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 328,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen2",
      "chat",
      "conversational",
      "en",
      "arxiv:2309.00071",
      "arxiv:2407.10671",
      "base_model:Qwen/Qwen2.5-32B",
      "base_model:finetune:Qwen/Qwen2.5-32B",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "24GB+ VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 22,
      "context_window_tokens": 4096,
      "parameters_total_b": 32,
      "parameters_active_b": 32,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "opt-125m",
    "name": "Opt 125M",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/facebook/opt-125m",
    "description": "Open source model facebook/opt-125m. 233 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 233,
    "language": "Python",
    "license": "other",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "pytorch",
      "tf",
      "jax",
      "opt",
      "en",
      "arxiv:2205.01068",
      "arxiv:2005.14165",
      "text-generation-inference",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-1.7b",
    "name": "Qwen3 1.7B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen3-1.7B",
    "description": "Open source model Qwen/Qwen3-1.7B. 422 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 422,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen3",
      "conversational",
      "arxiv:2505.09388",
      "base_model:Qwen/Qwen3-1.7B-Base",
      "base_model:finetune:Qwen/Qwen3-1.7B-Base",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 5,
      "context_window_tokens": 4096,
      "parameters_total_b": 7,
      "parameters_active_b": 7,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "tiny-qwen2forcausallm-2.5",
    "name": "Tiny Qwen2Forcausallm 2.5",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
    "description": "Open source model trl-internal-testing/tiny-Qwen2ForCausalLM-2.5. 3 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 3,
    "language": "Python",
    "license": "unknown",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen2",
      "trl",
      "conversational",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "dolphin-2.9.1-yi-1.5-34b",
    "name": "Dolphin 2.9.1 Yi 1.5 34B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/dphn/dolphin-2.9.1-yi-1.5-34b",
    "description": "Open source model dphn/dolphin-2.9.1-yi-1.5-34b. 54 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 54,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "llama",
      "generated_from_trainer",
      "axolotl",
      "conversational",
      "dataset:cognitivecomputations/Dolphin-2.9",
      "dataset:teknium/OpenHermes-2.5",
      "dataset:m-a-p/CodeFeedback-Filtered-Instruction",
      "dataset:cognitivecomputations/dolphin-coder",
      "dataset:cognitivecomputations/samantha-data",
      "dataset:microsoft/orca-math-word-problems-200k",
      "dataset:Locutusque/function-calling-chatml",
      "dataset:internlm/Agent-FLAN",
      "base_model:01-ai/Yi-1.5-34B",
      "base_model:finetune:01-ai/Yi-1.5-34B",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "24GB+ VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 24,
      "context_window_tokens": 4096,
      "parameters_total_b": 34,
      "parameters_active_b": 34,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-embedding-0.6b",
    "name": "Qwen3 Embedding 0.6B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen3-Embedding-0.6B",
    "description": "Open source model Qwen/Qwen3-Embedding-0.6B. 879 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 879,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "sentence-transformers",
      "safetensors",
      "qwen3",
      "transformers",
      "sentence-similarity",
      "feature-extraction",
      "text-embeddings-inference",
      "arxiv:2506.05176",
      "base_model:Qwen/Qwen3-0.6B-Base",
      "base_model:finetune:Qwen/Qwen3-0.6B-Base",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 4,
      "context_window_tokens": 4096,
      "parameters_total_b": 6,
      "parameters_active_b": 6,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "gpt-oss-120b",
    "name": "Gpt Oss 120B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/openai/gpt-oss-120b",
    "description": "Open source model openai/gpt-oss-120b. 4503 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 4503,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "gpt_oss",
      "vllm",
      "conversational",
      "arxiv:2508.10925",
      "endpoints_compatible",
      "8-bit",
      "mxfp4",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "24GB+ VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 84,
      "context_window_tokens": 4096,
      "parameters_total_b": 120,
      "parameters_active_b": 120,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-4b-instruct-2507",
    "name": "Qwen3 4B Instruct 2507",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507",
    "description": "Open source model Qwen/Qwen3-4B-Instruct-2507. 730 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 730,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen3",
      "conversational",
      "arxiv:2505.09388",
      "eval-results",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 3,
      "context_window_tokens": 4096,
      "parameters_total_b": 4,
      "parameters_active_b": 4,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "moondream2",
    "name": "Moondream2",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/vikhyatk/moondream2",
    "description": "Open source model vikhyatk/moondream2. 1373 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 1373,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "moondream1",
      "image-text-to-text",
      "custom_code",
      "doi:10.57967/hf/6762",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "llama-3.2-1b-instruct",
    "name": "Llama 3.2 1B Instruct",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct",
    "description": "Open source model meta-llama/Llama-3.2-1B-Instruct. 1292 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 1292,
    "language": "Python",
    "license": "llama3.2",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "llama",
      "facebook",
      "meta",
      "pytorch",
      "llama-3",
      "conversational",
      "en",
      "de",
      "fr",
      "it",
      "pt",
      "hi",
      "es",
      "th",
      "arxiv:2204.05149",
      "arxiv:2405.16406",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 1,
      "parameters_active_b": 1,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen2-1.5b-instruct",
    "name": "Qwen2 1.5B Instruct",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen2-1.5B-Instruct",
    "description": "Open source model Qwen/Qwen2-1.5B-Instruct. 158 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 158,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen2",
      "chat",
      "conversational",
      "en",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 4,
      "context_window_tokens": 4096,
      "parameters_total_b": 5,
      "parameters_active_b": 5,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen2.5-coder-0.5b-instruct",
    "name": "Qwen2.5 Coder 0.5B Instruct",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen2.5-Coder-0.5B-Instruct",
    "description": "Open source model Qwen/Qwen2.5-Coder-0.5B-Instruct. 64 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 64,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen2",
      "code",
      "codeqwen",
      "chat",
      "qwen",
      "qwen-coder",
      "conversational",
      "en",
      "arxiv:2409.12186",
      "arxiv:2407.10671",
      "base_model:Qwen/Qwen2.5-Coder-0.5B",
      "base_model:finetune:Qwen/Qwen2.5-Coder-0.5B",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 4,
      "context_window_tokens": 4096,
      "parameters_total_b": 5,
      "parameters_active_b": 5,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "kimi-k2.5",
    "name": "Kimi K2.5",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/mlx-community/Kimi-K2.5",
    "description": "Open source model mlx-community/Kimi-K2.5. 28 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 28,
    "language": "Python",
    "license": "other",
    "tags": [
      "AI",
      "LLM",
      "mlx",
      "safetensors",
      "kimi_k25",
      "conversational",
      "custom_code",
      "base_model:moonshotai/Kimi-K2.5",
      "base_model:quantized:moonshotai/Kimi-K2.5",
      "4-bit",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "mistral-7b-instruct-v0.2",
    "name": "Mistral 7B Instruct V0.2",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2",
    "description": "Open source model mistralai/Mistral-7B-Instruct-v0.2. 3075 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 3075,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "pytorch",
      "safetensors",
      "mistral",
      "finetuned",
      "mistral-common",
      "conversational",
      "arxiv:2310.06825",
      "text-generation-inference",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 5,
      "context_window_tokens": 4096,
      "parameters_total_b": 7,
      "parameters_active_b": 7,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-30b-a3b-instruct-2507",
    "name": "Qwen3 30B A3B Instruct 2507",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen3-30B-A3B-Instruct-2507",
    "description": "Open source model Qwen/Qwen3-30B-A3B-Instruct-2507. 766 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 766,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen3_moe",
      "conversational",
      "arxiv:2402.17463",
      "arxiv:2407.02490",
      "arxiv:2501.15383",
      "arxiv:2404.06654",
      "arxiv:2505.09388",
      "eval-results",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "24GB+ VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 21,
      "context_window_tokens": 4096,
      "parameters_total_b": 30,
      "parameters_active_b": 30,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "llm-jp-3-3.7b-instruct",
    "name": "Llm Jp 3 3.7B Instruct",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/llm-jp/llm-jp-3-3.7b-instruct",
    "description": "Open source model llm-jp/llm-jp-3-3.7b-instruct. 13 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 13,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "llama",
      "conversational",
      "en",
      "ja",
      "text-generation-inference",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 5,
      "context_window_tokens": 4096,
      "parameters_total_b": 7,
      "parameters_active_b": 7,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "llama-3.2-3b-instruct",
    "name": "Llama 3.2 3B Instruct",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct",
    "description": "Open source model meta-llama/Llama-3.2-3B-Instruct. 1986 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 1986,
    "language": "Python",
    "license": "llama3.2",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "llama",
      "facebook",
      "meta",
      "pytorch",
      "llama-3",
      "conversational",
      "en",
      "de",
      "fr",
      "it",
      "pt",
      "hi",
      "es",
      "th",
      "arxiv:2204.05149",
      "arxiv:2405.16406",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 2,
      "context_window_tokens": 4096,
      "parameters_total_b": 3,
      "parameters_active_b": 3,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "distilgpt2",
    "name": "Distilgpt2",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/distilbert/distilgpt2",
    "description": "Open source model distilbert/distilgpt2. 609 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 609,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "pytorch",
      "tf",
      "jax",
      "tflite",
      "rust",
      "coreml",
      "safetensors",
      "gpt2",
      "exbert",
      "en",
      "dataset:openwebtext",
      "arxiv:1910.01108",
      "arxiv:2201.08542",
      "arxiv:2203.12574",
      "arxiv:1910.09700",
      "arxiv:1503.02531",
      "model-index",
      "co2_eq_emissions",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-embedding-8b",
    "name": "Qwen3 Embedding 8B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen3-Embedding-8B",
    "description": "Open source model Qwen/Qwen3-Embedding-8B. 584 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 584,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "sentence-transformers",
      "safetensors",
      "qwen3",
      "transformers",
      "sentence-similarity",
      "feature-extraction",
      "text-embeddings-inference",
      "arxiv:2506.05176",
      "base_model:Qwen/Qwen3-8B-Base",
      "base_model:finetune:Qwen/Qwen3-8B-Base",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 6,
      "context_window_tokens": 4096,
      "parameters_total_b": 8,
      "parameters_active_b": 8,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "meta-llama-3-8b",
    "name": "Meta Llama 3 8B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/meta-llama/Meta-Llama-3-8B",
    "description": "Open source model meta-llama/Meta-Llama-3-8B. 6458 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 6458,
    "language": "Python",
    "license": "llama3",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "llama",
      "facebook",
      "meta",
      "pytorch",
      "llama-3",
      "en",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 6,
      "context_window_tokens": 4096,
      "parameters_total_b": 8,
      "parameters_active_b": 8,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "tinyllama-1.1b-chat-v1.0",
    "name": "Tinyllama 1.1B Chat V1.0",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    "description": "Open source model TinyLlama/TinyLlama-1.1B-Chat-v1.0. 1526 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 1526,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "llama",
      "conversational",
      "en",
      "dataset:cerebras/SlimPajama-627B",
      "dataset:bigcode/starcoderdata",
      "dataset:HuggingFaceH4/ultrachat_200k",
      "dataset:HuggingFaceH4/ultrafeedback_binarized",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 1,
      "parameters_active_b": 1,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "glm-4.7-flash",
    "name": "Glm 4.7 Flash",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/zai-org/GLM-4.7-Flash",
    "description": "Open source model zai-org/GLM-4.7-Flash. 1538 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 1538,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "glm4_moe_lite",
      "conversational",
      "en",
      "zh",
      "arxiv:2508.06471",
      "eval-results",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "llama-3.2-1b",
    "name": "Llama 3.2 1B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/meta-llama/Llama-3.2-1B",
    "description": "Open source model meta-llama/Llama-3.2-1B. 2295 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 2295,
    "language": "Python",
    "license": "llama3.2",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "llama",
      "facebook",
      "meta",
      "pytorch",
      "llama-3",
      "en",
      "de",
      "fr",
      "it",
      "pt",
      "hi",
      "es",
      "th",
      "arxiv:2204.05149",
      "arxiv:2405.16406",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 1,
      "parameters_active_b": 1,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-32b",
    "name": "Qwen3 32B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen3-32B",
    "description": "Open source model Qwen/Qwen3-32B. 656 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 656,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen3",
      "conversational",
      "arxiv:2309.00071",
      "arxiv:2505.09388",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "24GB+ VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 22,
      "context_window_tokens": 4096,
      "parameters_total_b": 32,
      "parameters_active_b": 32,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "llama-3.2-1b-instruct-fp8-dynamic",
    "name": "Llama 3.2 1B Instruct Fp8 Dynamic",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/RedHatAI/Llama-3.2-1B-Instruct-FP8-dynamic",
    "description": "Open source model RedHatAI/Llama-3.2-1B-Instruct-FP8-dynamic. 3 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 3,
    "language": "Python",
    "license": "llama3.2",
    "tags": [
      "AI",
      "LLM",
      "safetensors",
      "llama",
      "fp8",
      "vllm",
      "conversational",
      "en",
      "de",
      "fr",
      "it",
      "pt",
      "hi",
      "es",
      "th",
      "base_model:meta-llama/Llama-3.2-1B-Instruct",
      "base_model:quantized:meta-llama/Llama-3.2-1B-Instruct",
      "compressed-tensors",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 1,
      "parameters_active_b": 1,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen2.5-coder-1.5b-instruct",
    "name": "Qwen2.5 Coder 1.5B Instruct",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct",
    "description": "Open source model Qwen/Qwen2.5-Coder-1.5B-Instruct. 106 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 106,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen2",
      "code",
      "codeqwen",
      "chat",
      "qwen",
      "qwen-coder",
      "conversational",
      "en",
      "arxiv:2409.12186",
      "arxiv:2407.10671",
      "base_model:Qwen/Qwen2.5-Coder-1.5B",
      "base_model:finetune:Qwen/Qwen2.5-Coder-1.5B",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 4,
      "context_window_tokens": 4096,
      "parameters_total_b": 5,
      "parameters_active_b": 5,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "meta-llama-3-8b-instruct",
    "name": "Meta Llama 3 8B Instruct",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct",
    "description": "Open source model meta-llama/Meta-Llama-3-8B-Instruct. 4380 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 4380,
    "language": "Python",
    "license": "llama3",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "llama",
      "facebook",
      "meta",
      "pytorch",
      "llama-3",
      "conversational",
      "en",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 6,
      "context_window_tokens": 4096,
      "parameters_total_b": 8,
      "parameters_active_b": 8,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "gemma-3-1b-it",
    "name": "Gemma 3 1B It",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/google/gemma-3-1b-it",
    "description": "Open source model google/gemma-3-1b-it. 842 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 842,
    "language": "Python",
    "license": "gemma",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "gemma3_text",
      "conversational",
      "arxiv:1905.07830",
      "arxiv:1905.10044",
      "arxiv:1911.11641",
      "arxiv:1904.09728",
      "arxiv:1705.03551",
      "arxiv:1911.01547",
      "arxiv:1907.10641",
      "arxiv:1903.00161",
      "arxiv:2009.03300",
      "arxiv:2304.06364",
      "arxiv:2103.03874",
      "arxiv:2110.14168",
      "arxiv:2311.12022",
      "arxiv:2108.07732",
      "arxiv:2107.03374",
      "arxiv:2210.03057",
      "arxiv:2106.03193",
      "arxiv:1910.11856",
      "arxiv:2502.12404",
      "arxiv:2502.21228",
      "arxiv:2404.16816",
      "arxiv:2104.12756",
      "arxiv:2311.16502",
      "arxiv:2203.10244",
      "arxiv:2404.12390",
      "arxiv:1810.12440",
      "arxiv:1908.02660",
      "arxiv:2312.11805",
      "base_model:google/gemma-3-1b-pt",
      "base_model:finetune:google/gemma-3-1b-pt",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 1,
      "parameters_active_b": 1,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "phi-2",
    "name": "Phi 2",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/microsoft/phi-2",
    "description": "Open source model microsoft/phi-2. 3425 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 3425,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "phi",
      "nlp",
      "code",
      "en",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen2.5-coder-7b-instruct",
    "name": "Qwen2.5 Coder 7B Instruct",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct",
    "description": "Open source model Qwen/Qwen2.5-Coder-7B-Instruct. 646 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 646,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen2",
      "code",
      "codeqwen",
      "chat",
      "qwen",
      "qwen-coder",
      "conversational",
      "en",
      "arxiv:2409.12186",
      "arxiv:2309.00071",
      "arxiv:2407.10671",
      "base_model:Qwen/Qwen2.5-Coder-7B",
      "base_model:finetune:Qwen/Qwen2.5-Coder-7B",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 5,
      "context_window_tokens": 4096,
      "parameters_total_b": 7,
      "parameters_active_b": 7,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen2.5-7b",
    "name": "Qwen2.5 7B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen2.5-7B",
    "description": "Open source model Qwen/Qwen2.5-7B. 264 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 264,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen2",
      "conversational",
      "en",
      "arxiv:2407.10671",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 5,
      "context_window_tokens": 4096,
      "parameters_total_b": 7,
      "parameters_active_b": 7,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "deepseek-r1-distill-qwen-1.5b",
    "name": "Deepseek R1 Distill Qwen 1.5B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
    "description": "Open source model deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B. 1446 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 1446,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen2",
      "conversational",
      "arxiv:2501.12948",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 4,
      "context_window_tokens": 4096,
      "parameters_total_b": 5,
      "parameters_active_b": 5,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "deepseek-v3",
    "name": "Deepseek V3",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/deepseek-ai/DeepSeek-V3",
    "description": "Open source model deepseek-ai/DeepSeek-V3. 4024 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 4024,
    "language": "Python",
    "license": "unknown",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "deepseek_v3",
      "conversational",
      "custom_code",
      "arxiv:2412.19437",
      "eval-results",
      "text-generation-inference",
      "endpoints_compatible",
      "fp8",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "gpt2-large",
    "name": "Gpt2 Large",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/openai-community/gpt2-large",
    "description": "Open source model openai-community/gpt2-large. 344 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 344,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "pytorch",
      "tf",
      "jax",
      "rust",
      "onnx",
      "safetensors",
      "gpt2",
      "en",
      "arxiv:1910.09700",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "glm-4.7-flash-mlx-8bit",
    "name": "Glm 4.7 Flash Mlx 8Bit",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/lmstudio-community/GLM-4.7-Flash-MLX-8bit",
    "description": "Open source model lmstudio-community/GLM-4.7-Flash-MLX-8bit. 9 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 9,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "glm4_moe_lite",
      "mlx",
      "conversational",
      "en",
      "zh",
      "base_model:zai-org/GLM-4.7-Flash",
      "base_model:quantized:zai-org/GLM-4.7-Flash",
      "endpoints_compatible",
      "8-bit",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 6,
      "context_window_tokens": 4096,
      "parameters_total_b": 8,
      "parameters_active_b": 8,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "glm-4.7-flash-mlx-6bit",
    "name": "Glm 4.7 Flash Mlx 6Bit",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/lmstudio-community/GLM-4.7-Flash-MLX-6bit",
    "description": "Open source model lmstudio-community/GLM-4.7-Flash-MLX-6bit. 7 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 7,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "glm4_moe_lite",
      "mlx",
      "conversational",
      "en",
      "zh",
      "base_model:zai-org/GLM-4.7-Flash",
      "base_model:quantized:zai-org/GLM-4.7-Flash",
      "endpoints_compatible",
      "6-bit",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 4,
      "context_window_tokens": 4096,
      "parameters_total_b": 6,
      "parameters_active_b": 6,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-0.6b-fp8",
    "name": "Qwen3 0.6B Fp8",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen3-0.6B-FP8",
    "description": "Open source model Qwen/Qwen3-0.6B-FP8. 56 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 56,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen3",
      "conversational",
      "arxiv:2505.09388",
      "base_model:Qwen/Qwen3-0.6B",
      "base_model:quantized:Qwen/Qwen3-0.6B",
      "text-generation-inference",
      "endpoints_compatible",
      "fp8",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 4,
      "context_window_tokens": 4096,
      "parameters_total_b": 6,
      "parameters_active_b": 6,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "llama-3.1-8b",
    "name": "Llama 3.1 8B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/meta-llama/Llama-3.1-8B",
    "description": "Open source model meta-llama/Llama-3.1-8B. 2065 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 2065,
    "language": "Python",
    "license": "llama3.1",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "llama",
      "facebook",
      "meta",
      "pytorch",
      "llama-3",
      "en",
      "de",
      "fr",
      "it",
      "pt",
      "hi",
      "es",
      "th",
      "arxiv:2204.05149",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 6,
      "context_window_tokens": 4096,
      "parameters_total_b": 8,
      "parameters_active_b": 8,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "pythia-160m",
    "name": "Pythia 160M",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/EleutherAI/pythia-160m",
    "description": "Open source model EleutherAI/pythia-160m. 38 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 38,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "pytorch",
      "safetensors",
      "gpt_neox",
      "causal-lm",
      "pythia",
      "en",
      "dataset:EleutherAI/pile",
      "arxiv:2304.01373",
      "arxiv:2101.00027",
      "arxiv:2201.07311",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "deepseek-r1-distill-qwen-32b",
    "name": "Deepseek R1 Distill Qwen 32B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
    "description": "Open source model deepseek-ai/DeepSeek-R1-Distill-Qwen-32B. 1517 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 1517,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen2",
      "conversational",
      "arxiv:2501.12948",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "24GB+ VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 22,
      "context_window_tokens": 4096,
      "parameters_total_b": 32,
      "parameters_active_b": 32,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "hunyuanocr",
    "name": "Hunyuanocr",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/tencent/HunyuanOCR",
    "description": "Open source model tencent/HunyuanOCR. 553 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 553,
    "language": "Python",
    "license": "other",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "hunyuan_vl",
      "ocr",
      "hunyuan",
      "vision-language",
      "image-to-text",
      "1B",
      "end-to-end",
      "image-text-to-text",
      "conversational",
      "multilingual",
      "arxiv:2511.19575",
      "base_model:tencent/HunyuanOCR",
      "base_model:finetune:tencent/HunyuanOCR",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-30b-a3b",
    "name": "Qwen3 30B A3B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen3-30B-A3B",
    "description": "Open source model Qwen/Qwen3-30B-A3B. 855 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 855,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen3_moe",
      "conversational",
      "arxiv:2309.00071",
      "arxiv:2505.09388",
      "base_model:Qwen/Qwen3-30B-A3B-Base",
      "base_model:finetune:Qwen/Qwen3-30B-A3B-Base",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "24GB+ VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 21,
      "context_window_tokens": 4096,
      "parameters_total_b": 30,
      "parameters_active_b": 30,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen2.5-0.5b",
    "name": "Qwen2.5 0.5B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen2.5-0.5B",
    "description": "Open source model Qwen/Qwen2.5-0.5B. 372 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 372,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen2",
      "conversational",
      "en",
      "arxiv:2407.10671",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 4,
      "context_window_tokens": 4096,
      "parameters_total_b": 5,
      "parameters_active_b": 5,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen2.5-32b-instruct-awq",
    "name": "Qwen2.5 32B Instruct Awq",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen2.5-32B-Instruct-AWQ",
    "description": "Open source model Qwen/Qwen2.5-32B-Instruct-AWQ. 94 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 94,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen2",
      "chat",
      "conversational",
      "en",
      "arxiv:2309.00071",
      "arxiv:2407.10671",
      "base_model:Qwen/Qwen2.5-32B-Instruct",
      "base_model:quantized:Qwen/Qwen2.5-32B-Instruct",
      "text-generation-inference",
      "endpoints_compatible",
      "4-bit",
      "awq",
      "region:us"
    ],
    "hardware_req": "24GB+ VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 22,
      "context_window_tokens": 4096,
      "parameters_total_b": 32,
      "parameters_active_b": 32,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "nvidia-nemotron-3-nano-30b-a3b-fp8",
    "name": "Nvidia Nemotron 3 Nano 30B A3B Fp8",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8",
    "description": "Open source model nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8. 284 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 284,
    "language": "Python",
    "license": "other",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "nemotron_h",
      "feature-extraction",
      "nvidia",
      "pytorch",
      "conversational",
      "custom_code",
      "en",
      "es",
      "fr",
      "de",
      "ja",
      "it",
      "dataset:nvidia/Nemotron-Pretraining-Code-v1",
      "dataset:nvidia/Nemotron-CC-v2",
      "dataset:nvidia/Nemotron-Pretraining-SFT-v1",
      "dataset:nvidia/Nemotron-CC-Math-v1",
      "dataset:nvidia/Nemotron-Pretraining-Code-v2",
      "dataset:nvidia/Nemotron-Pretraining-Specialized-v1",
      "dataset:nvidia/Nemotron-CC-v2.1",
      "dataset:nvidia/Nemotron-CC-Code-v1",
      "dataset:nvidia/Nemotron-Pretraining-Dataset-sample",
      "dataset:nvidia/Nemotron-Competitive-Programming-v1",
      "dataset:nvidia/Nemotron-Math-v2",
      "dataset:nvidia/Nemotron-Agentic-v1",
      "dataset:nvidia/Nemotron-Math-Proofs-v1",
      "dataset:nvidia/Nemotron-Instruction-Following-Chat-v1",
      "dataset:nvidia/Nemotron-Science-v1",
      "dataset:nvidia/Nemotron-3-Nano-RL-Training-Blend",
      "arxiv:2512.20848",
      "arxiv:2512.20856",
      "base_model:nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
      "base_model:quantized:nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
      "eval-results",
      "region:us"
    ],
    "hardware_req": "24GB+ VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 21,
      "context_window_tokens": 4096,
      "parameters_total_b": 30,
      "parameters_active_b": 30,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen2.5-14b-instruct",
    "name": "Qwen2.5 14B Instruct",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen2.5-14B-Instruct",
    "description": "Open source model Qwen/Qwen2.5-14B-Instruct. 312 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 312,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen2",
      "chat",
      "conversational",
      "en",
      "arxiv:2309.00071",
      "arxiv:2407.10671",
      "base_model:Qwen/Qwen2.5-14B",
      "base_model:finetune:Qwen/Qwen2.5-14B",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "16GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 10,
      "context_window_tokens": 4096,
      "parameters_total_b": 14,
      "parameters_active_b": 14,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "nvidia-nemotron-3-nano-30b-a3b-bf16",
    "name": "Nvidia Nemotron 3 Nano 30B A3B Bf16",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
    "description": "Open source model nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16. 634 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 634,
    "language": "Python",
    "license": "other",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "nemotron_h",
      "feature-extraction",
      "nvidia",
      "pytorch",
      "conversational",
      "custom_code",
      "en",
      "es",
      "fr",
      "de",
      "ja",
      "it",
      "dataset:nvidia/Nemotron-Pretraining-Code-v1",
      "dataset:nvidia/Nemotron-CC-v2",
      "dataset:nvidia/Nemotron-Pretraining-SFT-v1",
      "dataset:nvidia/Nemotron-CC-Math-v1",
      "dataset:nvidia/Nemotron-Pretraining-Code-v2",
      "dataset:nvidia/Nemotron-Pretraining-Specialized-v1",
      "dataset:nvidia/Nemotron-CC-v2.1",
      "dataset:nvidia/Nemotron-CC-Code-v1",
      "dataset:nvidia/Nemotron-Pretraining-Dataset-sample",
      "dataset:nvidia/Nemotron-Competitive-Programming-v1",
      "dataset:nvidia/Nemotron-Math-v2",
      "dataset:nvidia/Nemotron-Agentic-v1",
      "dataset:nvidia/Nemotron-Math-Proofs-v1",
      "dataset:nvidia/Nemotron-Instruction-Following-Chat-v1",
      "dataset:nvidia/Nemotron-Science-v1",
      "dataset:nvidia/Nemotron-3-Nano-RL-Training-Blend",
      "arxiv:2512.20848",
      "arxiv:2512.20856",
      "eval-results",
      "region:us"
    ],
    "hardware_req": "24GB+ VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 21,
      "context_window_tokens": 4096,
      "parameters_total_b": 30,
      "parameters_active_b": 30,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "openelm-1_1b-instruct",
    "name": "Openelm 1_1B Instruct",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/apple/OpenELM-1_1B-Instruct",
    "description": "Open source model apple/OpenELM-1_1B-Instruct. 72 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 72,
    "language": "Python",
    "license": "apple-amlr",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "openelm",
      "custom_code",
      "arxiv:2404.14619",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 1,
      "parameters_active_b": 1,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "tiny-random-llamaforcausallm",
    "name": "Tiny Random Llamaforcausallm",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/hmellor/tiny-random-LlamaForCausalLM",
    "description": "Open source model hmellor/tiny-random-LlamaForCausalLM. 0 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 0,
    "language": "Python",
    "license": "unknown",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "llama",
      "conversational",
      "arxiv:1910.09700",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-next-80b-a3b-instruct",
    "name": "Qwen3 Next 80B A3B Instruct",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Instruct",
    "description": "Open source model Qwen/Qwen3-Next-80B-A3B-Instruct. 937 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 937,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen3_next",
      "conversational",
      "arxiv:2309.00071",
      "arxiv:2404.06654",
      "arxiv:2505.09388",
      "arxiv:2501.15383",
      "eval-results",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "24GB+ VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 56,
      "context_window_tokens": 4096,
      "parameters_total_b": 80,
      "parameters_active_b": 80,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "h2ovl-mississippi-800m",
    "name": "H2Ovl Mississippi 800M",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/h2oai/h2ovl-mississippi-800m",
    "description": "Open source model h2oai/h2ovl-mississippi-800m. 39 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 39,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "h2ovl_chat",
      "feature-extraction",
      "gpt",
      "llm",
      "multimodal large language model",
      "ocr",
      "conversational",
      "custom_code",
      "en",
      "arxiv:2410.13611",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "bloomz-560m",
    "name": "Bloomz 560M",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/bigscience/bloomz-560m",
    "description": "Open source model bigscience/bloomz-560m. 137 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 137,
    "language": "Python",
    "license": "bigscience-bloom-rail-1.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "pytorch",
      "tensorboard",
      "safetensors",
      "bloom",
      "ak",
      "ar",
      "as",
      "bm",
      "bn",
      "ca",
      "code",
      "en",
      "es",
      "eu",
      "fon",
      "fr",
      "gu",
      "hi",
      "id",
      "ig",
      "ki",
      "kn",
      "lg",
      "ln",
      "ml",
      "mr",
      "ne",
      "nso",
      "ny",
      "or",
      "pa",
      "pt",
      "rn",
      "rw",
      "sn",
      "st",
      "sw",
      "ta",
      "te",
      "tn",
      "ts",
      "tum",
      "tw",
      "ur",
      "vi",
      "wo",
      "xh",
      "yo",
      "zh",
      "zu",
      "dataset:bigscience/xP3",
      "arxiv:2211.01786",
      "model-index",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen2.5-1.5b-quantized.w8a8",
    "name": "Qwen2.5 1.5B Quantized.W8A8",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/RedHatAI/Qwen2.5-1.5B-quantized.w8a8",
    "description": "Open source model RedHatAI/Qwen2.5-1.5B-quantized.w8a8. 2 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 2,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "safetensors",
      "qwen2",
      "chat",
      "neuralmagic",
      "llmcompressor",
      "conversational",
      "en",
      "base_model:Qwen/Qwen2.5-1.5B",
      "base_model:quantized:Qwen/Qwen2.5-1.5B",
      "8-bit",
      "compressed-tensors",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 4,
      "context_window_tokens": 4096,
      "parameters_total_b": 5,
      "parameters_active_b": 5,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "h2ovl-mississippi-2b",
    "name": "H2Ovl Mississippi 2B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/h2oai/h2ovl-mississippi-2b",
    "description": "Open source model h2oai/h2ovl-mississippi-2b. 40 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 40,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "h2ovl_chat",
      "feature-extraction",
      "gpt",
      "llm",
      "multimodal large language model",
      "ocr",
      "conversational",
      "custom_code",
      "en",
      "arxiv:2410.13611",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 2,
      "parameters_active_b": 2,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "llava-v1.5-7b",
    "name": "Llava V1.5 7B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/liuhaotian/llava-v1.5-7b",
    "description": "Open source model liuhaotian/llava-v1.5-7b. 537 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 537,
    "language": "Python",
    "license": "unknown",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "pytorch",
      "llava",
      "image-text-to-text",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 5,
      "context_window_tokens": 4096,
      "parameters_total_b": 7,
      "parameters_active_b": 7,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "t5-3b",
    "name": "T5 3B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/google-t5/t5-3b",
    "description": "Open source model google-t5/t5-3b. 51 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 51,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "pytorch",
      "tf",
      "safetensors",
      "t5",
      "summarization",
      "translation",
      "en",
      "fr",
      "ro",
      "de",
      "multilingual",
      "dataset:c4",
      "arxiv:1805.12471",
      "arxiv:1708.00055",
      "arxiv:1704.05426",
      "arxiv:1606.05250",
      "arxiv:1808.09121",
      "arxiv:1810.12885",
      "arxiv:1905.10044",
      "arxiv:1910.09700",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 2,
      "context_window_tokens": 4096,
      "parameters_total_b": 3,
      "parameters_active_b": 3,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen2.5-14b-instruct-awq",
    "name": "Qwen2.5 14B Instruct Awq",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-AWQ",
    "description": "Open source model Qwen/Qwen2.5-14B-Instruct-AWQ. 27 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 27,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen2",
      "chat",
      "conversational",
      "en",
      "arxiv:2309.00071",
      "arxiv:2407.10671",
      "base_model:Qwen/Qwen2.5-14B-Instruct",
      "base_model:quantized:Qwen/Qwen2.5-14B-Instruct",
      "text-generation-inference",
      "endpoints_compatible",
      "4-bit",
      "awq",
      "region:us"
    ],
    "hardware_req": "16GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 10,
      "context_window_tokens": 4096,
      "parameters_total_b": 14,
      "parameters_active_b": 14,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "llama-3.2-3b",
    "name": "Llama 3.2 3B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/meta-llama/Llama-3.2-3B",
    "description": "Open source model meta-llama/Llama-3.2-3B. 697 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 697,
    "language": "Python",
    "license": "llama3.2",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "llama",
      "facebook",
      "meta",
      "pytorch",
      "llama-3",
      "en",
      "de",
      "fr",
      "it",
      "pt",
      "hi",
      "es",
      "th",
      "arxiv:2204.05149",
      "arxiv:2405.16406",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 2,
      "context_window_tokens": 4096,
      "parameters_total_b": 3,
      "parameters_active_b": 3,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "phi-3-mini-4k-instruct-gptq-4bit",
    "name": "Phi 3 Mini 4K Instruct Gptq 4Bit",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/kaitchup/Phi-3-mini-4k-instruct-gptq-4bit",
    "description": "Open source model kaitchup/Phi-3-mini-4k-instruct-gptq-4bit. 2 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 2,
    "language": "Python",
    "license": "unknown",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "phi3",
      "conversational",
      "custom_code",
      "arxiv:1910.09700",
      "text-generation-inference",
      "endpoints_compatible",
      "4-bit",
      "gptq",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 3,
      "context_window_tokens": 4096,
      "parameters_total_b": 4,
      "parameters_active_b": 4,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen2.5-72b-instruct-awq",
    "name": "Qwen2.5 72B Instruct Awq",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct-AWQ",
    "description": "Open source model Qwen/Qwen2.5-72B-Instruct-AWQ. 74 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 74,
    "language": "Python",
    "license": "other",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen2",
      "chat",
      "conversational",
      "en",
      "arxiv:2309.00071",
      "arxiv:2407.10671",
      "base_model:Qwen/Qwen2.5-72B-Instruct",
      "base_model:quantized:Qwen/Qwen2.5-72B-Instruct",
      "text-generation-inference",
      "endpoints_compatible",
      "4-bit",
      "awq",
      "region:us"
    ],
    "hardware_req": "24GB+ VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 50,
      "context_window_tokens": 4096,
      "parameters_total_b": 72,
      "parameters_active_b": 72,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "smollm2-135m",
    "name": "Smollm2 135M",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/HuggingFaceTB/SmolLM2-135M",
    "description": "Open source model HuggingFaceTB/SmolLM2-135M. 166 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 166,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "llama",
      "en",
      "arxiv:2502.02737",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "llama-3.3-70b-instruct",
    "name": "Llama 3.3 70B Instruct",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct",
    "description": "Open source model meta-llama/Llama-3.3-70B-Instruct. 2658 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 2658,
    "language": "Python",
    "license": "llama3.3",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "llama",
      "facebook",
      "meta",
      "pytorch",
      "llama-3",
      "conversational",
      "en",
      "fr",
      "it",
      "pt",
      "hi",
      "es",
      "th",
      "de",
      "arxiv:2204.05149",
      "base_model:meta-llama/Llama-3.1-70B",
      "base_model:finetune:meta-llama/Llama-3.1-70B",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "24GB+ VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 49,
      "context_window_tokens": 4096,
      "parameters_total_b": 70,
      "parameters_active_b": 70,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-30b-a3b-instruct-2507-fp8",
    "name": "Qwen3 30B A3B Instruct 2507 Fp8",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen3-30B-A3B-Instruct-2507-FP8",
    "description": "Open source model Qwen/Qwen3-30B-A3B-Instruct-2507-FP8. 112 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 112,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen3_moe",
      "conversational",
      "arxiv:2505.09388",
      "base_model:Qwen/Qwen3-30B-A3B-Instruct-2507",
      "base_model:quantized:Qwen/Qwen3-30B-A3B-Instruct-2507",
      "endpoints_compatible",
      "fp8",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "24GB+ VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 21,
      "context_window_tokens": 4096,
      "parameters_total_b": 30,
      "parameters_active_b": 30,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen2.5-coder-32b-instruct",
    "name": "Qwen2.5 Coder 32B Instruct",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct",
    "description": "Open source model Qwen/Qwen2.5-Coder-32B-Instruct. 1995 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 1995,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen2",
      "code",
      "codeqwen",
      "chat",
      "qwen",
      "qwen-coder",
      "conversational",
      "en",
      "arxiv:2409.12186",
      "arxiv:2309.00071",
      "arxiv:2407.10671",
      "base_model:Qwen/Qwen2.5-Coder-32B",
      "base_model:finetune:Qwen/Qwen2.5-Coder-32B",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "24GB+ VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 22,
      "context_window_tokens": 4096,
      "parameters_total_b": 32,
      "parameters_active_b": 32,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-235b-a22b-instruct-2507-fp8",
    "name": "Qwen3 235B A22B Instruct 2507 Fp8",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen3-235B-A22B-Instruct-2507-FP8",
    "description": "Open source model Qwen/Qwen3-235B-A22B-Instruct-2507-FP8. 145 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 145,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen3_moe",
      "conversational",
      "arxiv:2505.09388",
      "base_model:Qwen/Qwen3-235B-A22B-Instruct-2507",
      "base_model:quantized:Qwen/Qwen3-235B-A22B-Instruct-2507",
      "endpoints_compatible",
      "fp8",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "24GB+ VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 164,
      "context_window_tokens": 4096,
      "parameters_total_b": 235,
      "parameters_active_b": 235,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "deepseek-r1-distill-qwen-7b",
    "name": "Deepseek R1 Distill Qwen 7B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
    "description": "Open source model deepseek-ai/DeepSeek-R1-Distill-Qwen-7B. 787 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 787,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen2",
      "conversational",
      "arxiv:2501.12948",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 5,
      "context_window_tokens": 4096,
      "parameters_total_b": 7,
      "parameters_active_b": 7,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "phi-3-mini-4k-instruct",
    "name": "Phi 3 Mini 4K Instruct",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct",
    "description": "Open source model microsoft/Phi-3-mini-4k-instruct. 1386 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 1386,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "phi3",
      "nlp",
      "code",
      "conversational",
      "custom_code",
      "en",
      "fr",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-14b",
    "name": "Qwen3 14B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen3-14B",
    "description": "Open source model Qwen/Qwen3-14B. 366 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 366,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen3",
      "conversational",
      "arxiv:2309.00071",
      "arxiv:2505.09388",
      "base_model:Qwen/Qwen3-14B-Base",
      "base_model:finetune:Qwen/Qwen3-14B-Base",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "16GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 10,
      "context_window_tokens": 4096,
      "parameters_total_b": 14,
      "parameters_active_b": 14,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen2.5-coder-1.5b",
    "name": "Qwen2.5 Coder 1.5B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B",
    "description": "Open source model Qwen/Qwen2.5-Coder-1.5B. 81 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 81,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen2",
      "code",
      "qwen",
      "qwen-coder",
      "codeqwen",
      "conversational",
      "en",
      "arxiv:2409.12186",
      "arxiv:2407.10671",
      "base_model:Qwen/Qwen2.5-1.5B",
      "base_model:finetune:Qwen/Qwen2.5-1.5B",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 4,
      "context_window_tokens": 4096,
      "parameters_total_b": 5,
      "parameters_active_b": 5,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "llama-3.1-70b-instruct",
    "name": "Llama 3.1 70B Instruct",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct",
    "description": "Open source model meta-llama/Llama-3.1-70B-Instruct. 890 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 890,
    "language": "Python",
    "license": "llama3.1",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "llama",
      "facebook",
      "meta",
      "pytorch",
      "llama-3",
      "conversational",
      "en",
      "de",
      "fr",
      "it",
      "pt",
      "hi",
      "es",
      "th",
      "arxiv:2204.05149",
      "base_model:meta-llama/Llama-3.1-70B",
      "base_model:finetune:meta-llama/Llama-3.1-70B",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "24GB+ VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 49,
      "context_window_tokens": 4096,
      "parameters_total_b": 70,
      "parameters_active_b": 70,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "hunyuanimage-3.0",
    "name": "Hunyuanimage 3.0",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/tencent/HunyuanImage-3.0",
    "description": "Open source model tencent/HunyuanImage-3.0. 640 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 640,
    "language": "Python",
    "license": "other",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "hunyuan_image_3_moe",
      "text-to-image",
      "custom_code",
      "arxiv:2509.23951",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen2.5-coder-7b-instruct-awq",
    "name": "Qwen2.5 Coder 7B Instruct Awq",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct-AWQ",
    "description": "Open source model Qwen/Qwen2.5-Coder-7B-Instruct-AWQ. 19 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 19,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen2",
      "code",
      "codeqwen",
      "chat",
      "qwen",
      "qwen-coder",
      "conversational",
      "en",
      "arxiv:2409.12186",
      "arxiv:2309.00071",
      "arxiv:2407.10671",
      "base_model:Qwen/Qwen2.5-Coder-7B-Instruct",
      "base_model:quantized:Qwen/Qwen2.5-Coder-7B-Instruct",
      "text-generation-inference",
      "endpoints_compatible",
      "4-bit",
      "awq",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 5,
      "context_window_tokens": 4096,
      "parameters_total_b": 7,
      "parameters_active_b": 7,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-coder-30b-a3b-instruct",
    "name": "Qwen3 Coder 30B A3B Instruct",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen3-Coder-30B-A3B-Instruct",
    "description": "Open source model Qwen/Qwen3-Coder-30B-A3B-Instruct. 945 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 945,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen3_moe",
      "conversational",
      "arxiv:2505.09388",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "24GB+ VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 21,
      "context_window_tokens": 4096,
      "parameters_total_b": 30,
      "parameters_active_b": 30,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "deepseek-r1-0528",
    "name": "Deepseek R1 0528",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
    "description": "Open source model deepseek-ai/DeepSeek-R1-0528. 2400 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 2400,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "deepseek_v3",
      "conversational",
      "custom_code",
      "arxiv:2501.12948",
      "eval-results",
      "text-generation-inference",
      "endpoints_compatible",
      "fp8",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "tiny-random-llama-3",
    "name": "Tiny Random Llama 3",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/llamafactory/tiny-random-Llama-3",
    "description": "Open source model llamafactory/tiny-random-Llama-3. 3 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 3,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "llama",
      "conversational",
      "text-generation-inference",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen2.5-coder-32b-instruct-awq",
    "name": "Qwen2.5 Coder 32B Instruct Awq",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct-AWQ",
    "description": "Open source model Qwen/Qwen2.5-Coder-32B-Instruct-AWQ. 33 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 33,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen2",
      "code",
      "codeqwen",
      "chat",
      "qwen",
      "qwen-coder",
      "conversational",
      "en",
      "arxiv:2409.12186",
      "arxiv:2309.00071",
      "arxiv:2407.10671",
      "base_model:Qwen/Qwen2.5-Coder-32B-Instruct",
      "base_model:quantized:Qwen/Qwen2.5-Coder-32B-Instruct",
      "text-generation-inference",
      "endpoints_compatible",
      "4-bit",
      "awq",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "24GB+ VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 22,
      "context_window_tokens": 4096,
      "parameters_total_b": 32,
      "parameters_active_b": 32,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "mistral-7b-instruct-v0.1",
    "name": "Mistral 7B Instruct V0.1",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1",
    "description": "Open source model mistralai/Mistral-7B-Instruct-v0.1. 1826 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 1826,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "pytorch",
      "safetensors",
      "mistral",
      "finetuned",
      "mistral-common",
      "conversational",
      "arxiv:2310.06825",
      "base_model:mistralai/Mistral-7B-v0.1",
      "base_model:finetune:mistralai/Mistral-7B-v0.1",
      "text-generation-inference",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 5,
      "context_window_tokens": 4096,
      "parameters_total_b": 7,
      "parameters_active_b": 7,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "gpt-oss-20b-mxfp4-q8",
    "name": "Gpt Oss 20B Mxfp4 Q8",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/mlx-community/gpt-oss-20b-MXFP4-Q8",
    "description": "Open source model mlx-community/gpt-oss-20b-MXFP4-Q8. 31 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 31,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "mlx",
      "safetensors",
      "gpt_oss",
      "vllm",
      "conversational",
      "base_model:openai/gpt-oss-20b",
      "base_model:quantized:openai/gpt-oss-20b",
      "4-bit",
      "region:us"
    ],
    "hardware_req": "16GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 14,
      "context_window_tokens": 4096,
      "parameters_total_b": 20,
      "parameters_active_b": 20,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-embedding-4b",
    "name": "Qwen3 Embedding 4B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen3-Embedding-4B",
    "description": "Open source model Qwen/Qwen3-Embedding-4B. 224 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 224,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "sentence-transformers",
      "safetensors",
      "qwen3",
      "transformers",
      "sentence-similarity",
      "feature-extraction",
      "text-embeddings-inference",
      "arxiv:2506.05176",
      "base_model:Qwen/Qwen3-4B-Base",
      "base_model:finetune:Qwen/Qwen3-4B-Base",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 3,
      "context_window_tokens": 4096,
      "parameters_total_b": 4,
      "parameters_active_b": 4,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen2.5-1.5b-instruct-awq",
    "name": "Qwen2.5 1.5B Instruct Awq",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-AWQ",
    "description": "Open source model Qwen/Qwen2.5-1.5B-Instruct-AWQ. 6 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 6,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen2",
      "chat",
      "conversational",
      "en",
      "arxiv:2407.10671",
      "base_model:Qwen/Qwen2.5-1.5B-Instruct",
      "base_model:quantized:Qwen/Qwen2.5-1.5B-Instruct",
      "text-generation-inference",
      "endpoints_compatible",
      "4-bit",
      "awq",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 4,
      "context_window_tokens": 4096,
      "parameters_total_b": 5,
      "parameters_active_b": 5,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "meta-llama-3.1-8b-instruct-fp8",
    "name": "Meta Llama 3.1 8B Instruct Fp8",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8",
    "description": "Open source model RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8. 44 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 44,
    "language": "Python",
    "license": "llama3.1",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "llama",
      "fp8",
      "vllm",
      "conversational",
      "en",
      "de",
      "fr",
      "it",
      "pt",
      "hi",
      "es",
      "th",
      "base_model:meta-llama/Llama-3.1-8B-Instruct",
      "base_model:quantized:meta-llama/Llama-3.1-8B-Instruct",
      "text-generation-inference",
      "endpoints_compatible",
      "compressed-tensors",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 6,
      "context_window_tokens": 4096,
      "parameters_total_b": 8,
      "parameters_active_b": 8,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "phi-4",
    "name": "Phi 4",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/microsoft/phi-4",
    "description": "Open source model microsoft/phi-4. 2220 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 2220,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "phi3",
      "phi",
      "nlp",
      "math",
      "code",
      "chat",
      "conversational",
      "en",
      "arxiv:2412.08905",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "deepseek-r1",
    "name": "Deepseek R1",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/deepseek-ai/DeepSeek-R1",
    "description": "Open source model deepseek-ai/DeepSeek-R1. 13011 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 13011,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "deepseek_v3",
      "conversational",
      "custom_code",
      "arxiv:2501.12948",
      "eval-results",
      "text-generation-inference",
      "endpoints_compatible",
      "fp8",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "llama-3.2-1b-instruct-fp8",
    "name": "Llama 3.2 1B Instruct Fp8",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/RedHatAI/Llama-3.2-1B-Instruct-FP8",
    "description": "Open source model RedHatAI/Llama-3.2-1B-Instruct-FP8. 3 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 3,
    "language": "Python",
    "license": "llama3.2",
    "tags": [
      "AI",
      "LLM",
      "safetensors",
      "llama",
      "llama-3",
      "neuralmagic",
      "llmcompressor",
      "conversational",
      "en",
      "de",
      "fr",
      "it",
      "pt",
      "hi",
      "es",
      "th",
      "base_model:meta-llama/Llama-3.2-1B-Instruct",
      "base_model:quantized:meta-llama/Llama-3.2-1B-Instruct",
      "compressed-tensors",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 1,
      "parameters_active_b": 1,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "llama-3.1-405b",
    "name": "Llama 3.1 405B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/meta-llama/Llama-3.1-405B",
    "description": "Open source model meta-llama/Llama-3.1-405B. 961 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 961,
    "language": "Python",
    "license": "llama3.1",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "llama",
      "facebook",
      "meta",
      "pytorch",
      "llama-3",
      "en",
      "de",
      "fr",
      "it",
      "pt",
      "hi",
      "es",
      "th",
      "arxiv:2204.05149",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "24GB+ VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 284,
      "context_window_tokens": 4096,
      "parameters_total_b": 405,
      "parameters_active_b": 405,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-4b-thinking-2507",
    "name": "Qwen3 4B Thinking 2507",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen3-4B-Thinking-2507",
    "description": "Open source model Qwen/Qwen3-4B-Thinking-2507. 548 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 548,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen3",
      "conversational",
      "arxiv:2505.09388",
      "eval-results",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 3,
      "context_window_tokens": 4096,
      "parameters_total_b": 4,
      "parameters_active_b": 4,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "gpt2-medium",
    "name": "Gpt2 Medium",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/openai-community/gpt2-medium",
    "description": "Open source model openai-community/gpt2-medium. 193 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 193,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "pytorch",
      "tf",
      "jax",
      "rust",
      "onnx",
      "safetensors",
      "gpt2",
      "en",
      "arxiv:1910.09700",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "tiny-gpt2",
    "name": "Tiny Gpt2",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/sshleifer/tiny-gpt2",
    "description": "Open source model sshleifer/tiny-gpt2. 34 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 34,
    "language": "Python",
    "license": "unknown",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "pytorch",
      "tf",
      "jax",
      "gpt2",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "hermes-3-llama-3.1-8b",
    "name": "Hermes 3 Llama 3.1 8B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B",
    "description": "Open source model NousResearch/Hermes-3-Llama-3.1-8B. 385 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 385,
    "language": "Python",
    "license": "llama3",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "llama",
      "Llama-3",
      "instruct",
      "finetune",
      "chatml",
      "gpt4",
      "synthetic data",
      "distillation",
      "function calling",
      "json mode",
      "axolotl",
      "roleplaying",
      "chat",
      "conversational",
      "en",
      "arxiv:2408.11857",
      "base_model:meta-llama/Llama-3.1-8B",
      "base_model:finetune:meta-llama/Llama-3.1-8B",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 6,
      "context_window_tokens": 4096,
      "parameters_total_b": 8,
      "parameters_active_b": 8,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "phi-3.5-vision-instruct",
    "name": "Phi 3.5 Vision Instruct",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/microsoft/Phi-3.5-vision-instruct",
    "description": "Open source model microsoft/Phi-3.5-vision-instruct. 726 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 726,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "phi3_v",
      "nlp",
      "code",
      "vision",
      "image-text-to-text",
      "conversational",
      "custom_code",
      "multilingual",
      "arxiv:2404.14219",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": true
    },
    "referral_url": ""
  },
  {
    "slug": "minimax-m2",
    "name": "Minimax M2",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/MiniMaxAI/MiniMax-M2",
    "description": "Open source model MiniMaxAI/MiniMax-M2. 1485 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 1485,
    "language": "Python",
    "license": "other",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "minimax_m2",
      "conversational",
      "custom_code",
      "arxiv:2504.07164",
      "arxiv:2509.06501",
      "arxiv:2509.13160",
      "eval-results",
      "endpoints_compatible",
      "fp8",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "deepseek-r1-distill-llama-8b",
    "name": "Deepseek R1 Distill Llama 8B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
    "description": "Open source model deepseek-ai/DeepSeek-R1-Distill-Llama-8B. 843 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 843,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "llama",
      "conversational",
      "arxiv:2501.12948",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 6,
      "context_window_tokens": 4096,
      "parameters_total_b": 8,
      "parameters_active_b": 8,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-14b-awq",
    "name": "Qwen3 14B Awq",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen3-14B-AWQ",
    "description": "Open source model Qwen/Qwen3-14B-AWQ. 57 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 57,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen3",
      "conversational",
      "arxiv:2309.00071",
      "arxiv:2505.09388",
      "base_model:Qwen/Qwen3-14B",
      "base_model:quantized:Qwen/Qwen3-14B",
      "text-generation-inference",
      "endpoints_compatible",
      "4-bit",
      "awq",
      "region:us"
    ],
    "hardware_req": "16GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 10,
      "context_window_tokens": 4096,
      "parameters_total_b": 14,
      "parameters_active_b": 14,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-235b-a22b",
    "name": "Qwen3 235B A22B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen3-235B-A22B",
    "description": "Open source model Qwen/Qwen3-235B-A22B. 1075 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 1075,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen3_moe",
      "conversational",
      "arxiv:2309.00071",
      "arxiv:2505.09388",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "24GB+ VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 164,
      "context_window_tokens": 4096,
      "parameters_total_b": 235,
      "parameters_active_b": 235,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "meta-llama-3.1-8b-instruct-awq-int4",
    "name": "Meta Llama 3.1 8B Instruct Awq Int4",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4",
    "description": "Open source model hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4. 87 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 87,
    "language": "Python",
    "license": "llama3.1",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "llama",
      "llama-3.1",
      "meta",
      "autoawq",
      "conversational",
      "en",
      "de",
      "fr",
      "it",
      "pt",
      "hi",
      "es",
      "th",
      "text-generation-inference",
      "endpoints_compatible",
      "4-bit",
      "awq",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 6,
      "context_window_tokens": 4096,
      "parameters_total_b": 8,
      "parameters_active_b": 8,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "lfm2.5-1.2b-instruct-mlx-8bit",
    "name": "Lfm2.5 1.2B Instruct Mlx 8Bit",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/lmstudio-community/LFM2.5-1.2B-Instruct-MLX-8bit",
    "description": "Open source model lmstudio-community/LFM2.5-1.2B-Instruct-MLX-8bit. 1 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 1,
    "language": "Python",
    "license": "other",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "lfm2",
      "liquid",
      "lfm2.5",
      "edge",
      "mlx",
      "conversational",
      "en",
      "ar",
      "zh",
      "fr",
      "de",
      "ja",
      "ko",
      "es",
      "base_model:LiquidAI/LFM2.5-1.2B-Instruct",
      "base_model:quantized:LiquidAI/LFM2.5-1.2B-Instruct",
      "endpoints_compatible",
      "8-bit",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 2,
      "parameters_active_b": 2,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "glm-4.7-flash-gguf",
    "name": "Glm 4.7 Flash Gguf",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/unsloth/GLM-4.7-Flash-GGUF",
    "description": "Open source model unsloth/GLM-4.7-Flash-GGUF. 482 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 482,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "gguf",
      "unsloth",
      "en",
      "zh",
      "arxiv:2508.06471",
      "base_model:zai-org/GLM-4.7-Flash",
      "base_model:quantized:zai-org/GLM-4.7-Flash",
      "endpoints_compatible",
      "deploy:azure",
      "region:us",
      "imatrix",
      "conversational"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "deepseek-r1-distill-qwen-14b",
    "name": "Deepseek R1 Distill Qwen 14B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
    "description": "Open source model deepseek-ai/DeepSeek-R1-Distill-Qwen-14B. 603 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 603,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen2",
      "conversational",
      "arxiv:2501.12948",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "16GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 10,
      "context_window_tokens": 4096,
      "parameters_total_b": 14,
      "parameters_active_b": 14,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "lfm2.5-1.2b-instruct-mlx-6bit",
    "name": "Lfm2.5 1.2B Instruct Mlx 6Bit",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/lmstudio-community/LFM2.5-1.2B-Instruct-MLX-6bit",
    "description": "Open source model lmstudio-community/LFM2.5-1.2B-Instruct-MLX-6bit. 4 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 4,
    "language": "Python",
    "license": "other",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "lfm2",
      "liquid",
      "lfm2.5",
      "edge",
      "mlx",
      "conversational",
      "en",
      "ar",
      "zh",
      "fr",
      "de",
      "ja",
      "ko",
      "es",
      "base_model:LiquidAI/LFM2.5-1.2B-Instruct",
      "base_model:quantized:LiquidAI/LFM2.5-1.2B-Instruct",
      "endpoints_compatible",
      "6-bit",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 2,
      "parameters_active_b": 2,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "lfm2.5-1.2b-instruct-mlx-4bit",
    "name": "Lfm2.5 1.2B Instruct Mlx 4Bit",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/lmstudio-community/LFM2.5-1.2B-Instruct-MLX-4bit",
    "description": "Open source model lmstudio-community/LFM2.5-1.2B-Instruct-MLX-4bit. 1 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 1,
    "language": "Python",
    "license": "other",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "lfm2",
      "liquid",
      "lfm2.5",
      "edge",
      "mlx",
      "conversational",
      "en",
      "ar",
      "zh",
      "fr",
      "de",
      "ja",
      "ko",
      "es",
      "base_model:LiquidAI/LFM2.5-1.2B-Instruct",
      "base_model:quantized:LiquidAI/LFM2.5-1.2B-Instruct",
      "endpoints_compatible",
      "4-bit",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 2,
      "parameters_active_b": 2,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "vicuna-7b-v1.5",
    "name": "Vicuna 7B V1.5",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/lmsys/vicuna-7b-v1.5",
    "description": "Open source model lmsys/vicuna-7b-v1.5. 387 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 387,
    "language": "Python",
    "license": "llama2",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "pytorch",
      "llama",
      "arxiv:2307.09288",
      "arxiv:2306.05685",
      "text-generation-inference",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 5,
      "context_window_tokens": 4096,
      "parameters_total_b": 7,
      "parameters_active_b": 7,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "llama-3.2-1b-instruct-q8_0-gguf",
    "name": "Llama 3.2 1B Instruct Q8_0 Gguf",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/hugging-quants/Llama-3.2-1B-Instruct-Q8_0-GGUF",
    "description": "Open source model hugging-quants/Llama-3.2-1B-Instruct-Q8_0-GGUF. 43 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 43,
    "language": "Python",
    "license": "unknown",
    "tags": [
      "AI",
      "LLM",
      "gguf",
      "facebook",
      "meta",
      "pytorch",
      "llama",
      "llama-3",
      "llama-cpp",
      "gguf-my-repo",
      "en",
      "de",
      "fr",
      "it",
      "pt",
      "hi",
      "es",
      "th",
      "base_model:meta-llama/Llama-3.2-1B-Instruct",
      "base_model:quantized:meta-llama/Llama-3.2-1B-Instruct",
      "endpoints_compatible",
      "region:us",
      "conversational"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 1,
      "parameters_active_b": 1,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "llama-3.3-70b-instruct-awq",
    "name": "Llama 3.3 70B Instruct Awq",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/kosbu/Llama-3.3-70B-Instruct-AWQ",
    "description": "Open source model kosbu/Llama-3.3-70B-Instruct-AWQ. 10 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 10,
    "language": "Python",
    "license": "llama3.3",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "llama",
      "facebook",
      "meta",
      "llama-3",
      "awq",
      "conversational",
      "en",
      "fr",
      "it",
      "pt",
      "hi",
      "es",
      "th",
      "de",
      "base_model:meta-llama/Llama-3.3-70B-Instruct",
      "base_model:quantized:meta-llama/Llama-3.3-70B-Instruct",
      "text-generation-inference",
      "endpoints_compatible",
      "4-bit",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "24GB+ VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 49,
      "context_window_tokens": 4096,
      "parameters_total_b": 70,
      "parameters_active_b": 70,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-32b-fp8",
    "name": "Qwen3 32B Fp8",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen3-32B-FP8",
    "description": "Open source model Qwen/Qwen3-32B-FP8. 80 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 80,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen3",
      "conversational",
      "arxiv:2309.00071",
      "arxiv:2505.09388",
      "base_model:Qwen/Qwen3-32B",
      "base_model:quantized:Qwen/Qwen3-32B",
      "text-generation-inference",
      "endpoints_compatible",
      "fp8",
      "region:us"
    ],
    "hardware_req": "24GB+ VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 22,
      "context_window_tokens": 4096,
      "parameters_total_b": 32,
      "parameters_active_b": 32,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "gpt2-xl",
    "name": "Gpt2 Xl",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/openai-community/gpt2-xl",
    "description": "Open source model openai-community/gpt2-xl. 373 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 373,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "pytorch",
      "tf",
      "jax",
      "rust",
      "safetensors",
      "gpt2",
      "en",
      "arxiv:1910.09700",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-4b-instruct-2507-fp8",
    "name": "Qwen3 4B Instruct 2507 Fp8",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507-FP8",
    "description": "Open source model Qwen/Qwen3-4B-Instruct-2507-FP8. 65 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 65,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen3",
      "conversational",
      "arxiv:2505.09388",
      "base_model:Qwen/Qwen3-4B-Instruct-2507",
      "base_model:quantized:Qwen/Qwen3-4B-Instruct-2507",
      "text-generation-inference",
      "endpoints_compatible",
      "fp8",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 3,
      "context_window_tokens": 4096,
      "parameters_total_b": 4,
      "parameters_active_b": 4,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "xlnet-base-cased",
    "name": "Xlnet Base Cased",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/xlnet/xlnet-base-cased",
    "description": "Open source model xlnet/xlnet-base-cased. 80 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 80,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "pytorch",
      "tf",
      "rust",
      "xlnet",
      "en",
      "dataset:bookcorpus",
      "dataset:wikipedia",
      "arxiv:1906.08237",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "llama-2-7b-hf",
    "name": "Llama 2 7B Hf",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/meta-llama/Llama-2-7b-hf",
    "description": "Open source model meta-llama/Llama-2-7b-hf. 2268 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 2268,
    "language": "Python",
    "license": "llama2",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "pytorch",
      "safetensors",
      "llama",
      "facebook",
      "meta",
      "llama-2",
      "en",
      "arxiv:2307.09288",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 5,
      "context_window_tokens": 4096,
      "parameters_total_b": 7,
      "parameters_active_b": 7,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen2.5-math-7b-instruct",
    "name": "Qwen2.5 Math 7B Instruct",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen2.5-Math-7B-Instruct",
    "description": "Open source model Qwen/Qwen2.5-Math-7B-Instruct. 89 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 89,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen2",
      "chat",
      "conversational",
      "en",
      "arxiv:2409.12122",
      "base_model:Qwen/Qwen2.5-Math-7B",
      "base_model:finetune:Qwen/Qwen2.5-Math-7B",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 5,
      "context_window_tokens": 4096,
      "parameters_total_b": 7,
      "parameters_active_b": 7,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-reranker-0.6b",
    "name": "Qwen3 Reranker 0.6B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen3-Reranker-0.6B",
    "description": "Open source model Qwen/Qwen3-Reranker-0.6B. 305 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 305,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen3",
      "text-ranking",
      "arxiv:2506.05176",
      "base_model:Qwen/Qwen3-0.6B-Base",
      "base_model:finetune:Qwen/Qwen3-0.6B-Base",
      "text-embeddings-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 4,
      "context_window_tokens": 4096,
      "parameters_total_b": 6,
      "parameters_active_b": 6,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen2.5-1.5b",
    "name": "Qwen2.5 1.5B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen2.5-1.5B",
    "description": "Open source model Qwen/Qwen2.5-1.5B. 165 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 165,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen2",
      "conversational",
      "en",
      "arxiv:2407.10671",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 4,
      "context_window_tokens": 4096,
      "parameters_total_b": 5,
      "parameters_active_b": 5,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-30b-a3b-thinking-2507",
    "name": "Qwen3 30B A3B Thinking 2507",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen3-30B-A3B-Thinking-2507",
    "description": "Open source model Qwen/Qwen3-30B-A3B-Thinking-2507. 359 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 359,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen3_moe",
      "conversational",
      "arxiv:2402.17463",
      "arxiv:2407.02490",
      "arxiv:2501.15383",
      "arxiv:2404.06654",
      "arxiv:2505.09388",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "24GB+ VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 21,
      "context_window_tokens": 4096,
      "parameters_total_b": 30,
      "parameters_active_b": 30,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "smollm2-135m-instruct",
    "name": "Smollm2 135M Instruct",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/HuggingFaceTB/SmolLM2-135M-Instruct",
    "description": "Open source model HuggingFaceTB/SmolLM2-135M-Instruct. 292 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 292,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "tensorboard",
      "onnx",
      "safetensors",
      "llama",
      "transformers.js",
      "conversational",
      "en",
      "arxiv:2502.02737",
      "base_model:HuggingFaceTB/SmolLM2-135M",
      "base_model:quantized:HuggingFaceTB/SmolLM2-135M",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen2.5-math-1.5b",
    "name": "Qwen2.5 Math 1.5B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen2.5-Math-1.5B",
    "description": "Open source model Qwen/Qwen2.5-Math-1.5B. 100 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 100,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen2",
      "conversational",
      "en",
      "arxiv:2409.12122",
      "base_model:Qwen/Qwen2.5-1.5B",
      "base_model:finetune:Qwen/Qwen2.5-1.5B",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 4,
      "context_window_tokens": 4096,
      "parameters_total_b": 5,
      "parameters_active_b": 5,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "glm-4.5-air-awq-4bit",
    "name": "Glm 4.5 Air Awq 4Bit",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/cyankiwi/GLM-4.5-Air-AWQ-4bit",
    "description": "Open source model cyankiwi/GLM-4.5-Air-AWQ-4bit. 27 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 27,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "glm4_moe",
      "conversational",
      "en",
      "zh",
      "arxiv:2508.06471",
      "base_model:zai-org/GLM-4.5-Air",
      "base_model:quantized:zai-org/GLM-4.5-Air",
      "endpoints_compatible",
      "compressed-tensors",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 3,
      "context_window_tokens": 4096,
      "parameters_total_b": 4,
      "parameters_active_b": 4,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "llama-2-7b-chat-hf",
    "name": "Llama 2 7B Chat Hf",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/meta-llama/Llama-2-7b-chat-hf",
    "description": "Open source model meta-llama/Llama-2-7b-chat-hf. 4705 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 4705,
    "language": "Python",
    "license": "llama2",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "pytorch",
      "safetensors",
      "llama",
      "facebook",
      "meta",
      "llama-2",
      "conversational",
      "en",
      "arxiv:2307.09288",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 5,
      "context_window_tokens": 4096,
      "parameters_total_b": 7,
      "parameters_active_b": 7,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen2.5-coder-7b-instruct-gptq-int4",
    "name": "Qwen2.5 Coder 7B Instruct Gptq Int4",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-Int4",
    "description": "Open source model Qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-Int4. 12 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 12,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen2",
      "code",
      "codeqwen",
      "chat",
      "qwen",
      "qwen-coder",
      "conversational",
      "en",
      "arxiv:2409.12186",
      "arxiv:2309.00071",
      "arxiv:2407.10671",
      "base_model:Qwen/Qwen2.5-Coder-7B-Instruct",
      "base_model:quantized:Qwen/Qwen2.5-Coder-7B-Instruct",
      "text-generation-inference",
      "endpoints_compatible",
      "4-bit",
      "gptq",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 5,
      "context_window_tokens": 4096,
      "parameters_total_b": 7,
      "parameters_active_b": 7,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-vl-30b-a3b-instruct-awq",
    "name": "Qwen3 Vl 30B A3B Instruct Awq",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/QuantTrio/Qwen3-VL-30B-A3B-Instruct-AWQ",
    "description": "Open source model QuantTrio/Qwen3-VL-30B-A3B-Instruct-AWQ. 38 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 38,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen3_vl_moe",
      "image-text-to-text",
      "AWQ",
      "vLLM",
      "conversational",
      "arxiv:2505.09388",
      "arxiv:2502.13923",
      "arxiv:2409.12191",
      "arxiv:2308.12966",
      "base_model:Qwen/Qwen3-VL-30B-A3B-Instruct",
      "base_model:quantized:Qwen/Qwen3-VL-30B-A3B-Instruct",
      "endpoints_compatible",
      "4-bit",
      "awq",
      "region:us"
    ],
    "hardware_req": "24GB+ VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 21,
      "context_window_tokens": 4096,
      "parameters_total_b": 30,
      "parameters_active_b": 30,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-8b-base",
    "name": "Qwen3 8B Base",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen3-8B-Base",
    "description": "Open source model Qwen/Qwen3-8B-Base. 82 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 82,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen3",
      "conversational",
      "arxiv:2505.09388",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 6,
      "context_window_tokens": 4096,
      "parameters_total_b": 8,
      "parameters_active_b": 8,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen2.5-coder-14b-instruct",
    "name": "Qwen2.5 Coder 14B Instruct",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen2.5-Coder-14B-Instruct",
    "description": "Open source model Qwen/Qwen2.5-Coder-14B-Instruct. 140 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 140,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen2",
      "code",
      "codeqwen",
      "chat",
      "qwen",
      "qwen-coder",
      "conversational",
      "en",
      "arxiv:2409.12186",
      "arxiv:2309.00071",
      "arxiv:2407.10671",
      "base_model:Qwen/Qwen2.5-Coder-14B",
      "base_model:finetune:Qwen/Qwen2.5-Coder-14B",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "16GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 10,
      "context_window_tokens": 4096,
      "parameters_total_b": 14,
      "parameters_active_b": 14,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "stories15m_moe",
    "name": "Stories15M_Moe",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/ggml-org/stories15M_MOE",
    "description": "Open source model ggml-org/stories15M_MOE. 5 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 5,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "gguf",
      "mixtral",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "opt-1.3b",
    "name": "Opt 1.3B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/facebook/opt-1.3b",
    "description": "Open source model facebook/opt-1.3b. 182 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 182,
    "language": "Python",
    "license": "other",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "pytorch",
      "tf",
      "jax",
      "opt",
      "en",
      "arxiv:2205.01068",
      "arxiv:2005.14165",
      "text-generation-inference",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 2,
      "context_window_tokens": 4096,
      "parameters_total_b": 3,
      "parameters_active_b": 3,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "minimax-m2-awq",
    "name": "Minimax M2 Awq",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/QuantTrio/MiniMax-M2-AWQ",
    "description": "Open source model QuantTrio/MiniMax-M2-AWQ. 8 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 8,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "mixtral",
      "vLLM",
      "AWQ",
      "conversational",
      "arxiv:2504.07164",
      "arxiv:2509.06501",
      "arxiv:2509.13160",
      "base_model:MiniMaxAI/MiniMax-M2",
      "base_model:quantized:MiniMaxAI/MiniMax-M2",
      "text-generation-inference",
      "endpoints_compatible",
      "4-bit",
      "awq",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "glm-4.7-flash-nvfp4",
    "name": "Glm 4.7 Flash Nvfp4",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/GadflyII/GLM-4.7-Flash-NVFP4",
    "description": "Open source model GadflyII/GLM-4.7-Flash-NVFP4. 62 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 62,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "glm4_moe_lite",
      "moe",
      "nvfp4",
      "quantized",
      "vllm",
      "glm",
      "30b",
      "conversational",
      "en",
      "zh",
      "base_model:zai-org/GLM-4.7-Flash",
      "base_model:quantized:zai-org/GLM-4.7-Flash",
      "endpoints_compatible",
      "compressed-tensors",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "hy-mt1.5-7b",
    "name": "Hy Mt1.5 7B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/tencent/HY-MT1.5-7B",
    "description": "Open source model tencent/HY-MT1.5-7B. 133 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 133,
    "language": "Python",
    "license": "unknown",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "hunyuan_v1_dense",
      "translation",
      "zh",
      "en",
      "fr",
      "pt",
      "es",
      "ja",
      "tr",
      "ru",
      "ar",
      "ko",
      "th",
      "it",
      "de",
      "vi",
      "ms",
      "id",
      "tl",
      "hi",
      "pl",
      "cs",
      "nl",
      "km",
      "my",
      "fa",
      "gu",
      "ur",
      "te",
      "mr",
      "he",
      "bn",
      "ta",
      "uk",
      "bo",
      "kk",
      "mn",
      "ug",
      "arxiv:2512.24092",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 5,
      "context_window_tokens": 4096,
      "parameters_total_b": 7,
      "parameters_active_b": 7,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "gemma-2-27b-it",
    "name": "Gemma 2 27B It",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/google/gemma-2-27b-it",
    "description": "Open source model google/gemma-2-27b-it. 559 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 559,
    "language": "Python",
    "license": "gemma",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "gemma2",
      "conversational",
      "arxiv:2009.03300",
      "arxiv:1905.07830",
      "arxiv:1911.11641",
      "arxiv:1904.09728",
      "arxiv:1905.10044",
      "arxiv:1907.10641",
      "arxiv:1811.00937",
      "arxiv:1809.02789",
      "arxiv:1911.01547",
      "arxiv:1705.03551",
      "arxiv:2107.03374",
      "arxiv:2108.07732",
      "arxiv:2110.14168",
      "arxiv:2009.11462",
      "arxiv:2101.11718",
      "arxiv:2110.08193",
      "arxiv:1804.09301",
      "arxiv:2109.07958",
      "arxiv:1804.06876",
      "arxiv:2103.03874",
      "arxiv:2304.06364",
      "arxiv:2206.04615",
      "arxiv:2203.09509",
      "base_model:google/gemma-2-27b",
      "base_model:finetune:google/gemma-2-27b",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "24GB+ VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 19,
      "context_window_tokens": 4096,
      "parameters_total_b": 27,
      "parameters_active_b": 27,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-coder-next-gguf",
    "name": "Qwen3 Coder Next Gguf",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/unsloth/Qwen3-Coder-Next-GGUF",
    "description": "Open source model unsloth/Qwen3-Coder-Next-GGUF. 347 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 347,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "gguf",
      "qwen3_next",
      "unsloth",
      "qwen",
      "qwen3",
      "base_model:Qwen/Qwen3-Coder-Next",
      "base_model:quantized:Qwen/Qwen3-Coder-Next",
      "endpoints_compatible",
      "region:us",
      "imatrix",
      "conversational"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "gte-qwen2-1.5b-instruct",
    "name": "Gte Qwen2 1.5B Instruct",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Alibaba-NLP/gte-Qwen2-1.5B-instruct",
    "description": "Open source model Alibaba-NLP/gte-Qwen2-1.5B-instruct. 229 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 229,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "sentence-transformers",
      "safetensors",
      "qwen2",
      "mteb",
      "transformers",
      "Qwen2",
      "sentence-similarity",
      "custom_code",
      "arxiv:2308.03281",
      "model-index",
      "text-embeddings-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 4,
      "context_window_tokens": 4096,
      "parameters_total_b": 5,
      "parameters_active_b": 5,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "lfm2-1.2b",
    "name": "Lfm2 1.2B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/LiquidAI/LFM2-1.2B",
    "description": "Open source model LiquidAI/LFM2-1.2B. 349 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 349,
    "language": "Python",
    "license": "other",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "lfm2",
      "liquid",
      "edge",
      "conversational",
      "en",
      "ar",
      "zh",
      "fr",
      "de",
      "ja",
      "ko",
      "es",
      "arxiv:2511.23404",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 2,
      "parameters_active_b": 2,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "saiga_llama3_8b",
    "name": "Saiga_Llama3_8B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/IlyaGusev/saiga_llama3_8b",
    "description": "Open source model IlyaGusev/saiga_llama3_8b. 137 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 137,
    "language": "Python",
    "license": "other",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "llama",
      "conversational",
      "ru",
      "dataset:IlyaGusev/saiga_scored",
      "doi:10.57967/hf/2368",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 6,
      "context_window_tokens": 4096,
      "parameters_total_b": 8,
      "parameters_active_b": 8,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-1.7b-base",
    "name": "Qwen3 1.7B Base",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen3-1.7B-Base",
    "description": "Open source model Qwen/Qwen3-1.7B-Base. 62 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 62,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen3",
      "conversational",
      "arxiv:2505.09388",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 5,
      "context_window_tokens": 4096,
      "parameters_total_b": 7,
      "parameters_active_b": 7,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "mistral-7b-v0.3-bnb-4bit",
    "name": "Mistral 7B V0.3 Bnb 4Bit",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/unsloth/mistral-7b-v0.3-bnb-4bit",
    "description": "Open source model unsloth/mistral-7b-v0.3-bnb-4bit. 22 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 22,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "mistral",
      "unsloth",
      "mistral-7b",
      "en",
      "base_model:mistralai/Mistral-7B-v0.3",
      "base_model:quantized:mistralai/Mistral-7B-v0.3",
      "text-generation-inference",
      "endpoints_compatible",
      "4-bit",
      "bitsandbytes",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 5,
      "context_window_tokens": 4096,
      "parameters_total_b": 7,
      "parameters_active_b": 7,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "gemma-2-2b-it",
    "name": "Gemma 2 2B It",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/google/gemma-2-2b-it",
    "description": "Open source model google/gemma-2-2b-it. 1285 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 1285,
    "language": "Python",
    "license": "gemma",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "gemma2",
      "conversational",
      "arxiv:2009.03300",
      "arxiv:1905.07830",
      "arxiv:1911.11641",
      "arxiv:1904.09728",
      "arxiv:1905.10044",
      "arxiv:1907.10641",
      "arxiv:1811.00937",
      "arxiv:1809.02789",
      "arxiv:1911.01547",
      "arxiv:1705.03551",
      "arxiv:2107.03374",
      "arxiv:2108.07732",
      "arxiv:2110.14168",
      "arxiv:2009.11462",
      "arxiv:2101.11718",
      "arxiv:2110.08193",
      "arxiv:1804.09301",
      "arxiv:2109.07958",
      "arxiv:1804.06876",
      "arxiv:2103.03874",
      "arxiv:2304.06364",
      "arxiv:1903.00161",
      "arxiv:2206.04615",
      "arxiv:2203.09509",
      "arxiv:2403.13793",
      "base_model:google/gemma-2-2b",
      "base_model:finetune:google/gemma-2-2b",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 2,
      "parameters_active_b": 2,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "phi-4-multimodal-instruct",
    "name": "Phi 4 Multimodal Instruct",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/microsoft/Phi-4-multimodal-instruct",
    "description": "Open source model microsoft/Phi-4-multimodal-instruct. 1573 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 1573,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "phi4mm",
      "nlp",
      "code",
      "audio",
      "automatic-speech-recognition",
      "speech-summarization",
      "speech-translation",
      "visual-question-answering",
      "phi-4-multimodal",
      "phi",
      "phi-4-mini",
      "custom_code",
      "multilingual",
      "ar",
      "zh",
      "cs",
      "da",
      "nl",
      "en",
      "fi",
      "fr",
      "de",
      "he",
      "hu",
      "it",
      "ja",
      "ko",
      "no",
      "pl",
      "pt",
      "ru",
      "es",
      "sv",
      "th",
      "tr",
      "uk",
      "arxiv:2503.01743",
      "arxiv:2407.13833",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "pythia-70m-deduped",
    "name": "Pythia 70M Deduped",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/EleutherAI/pythia-70m-deduped",
    "description": "Open source model EleutherAI/pythia-70m-deduped. 27 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 27,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "pytorch",
      "safetensors",
      "gpt_neox",
      "causal-lm",
      "pythia",
      "en",
      "dataset:EleutherAI/the_pile_deduplicated",
      "arxiv:2304.01373",
      "arxiv:2101.00027",
      "arxiv:2201.07311",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "dialogpt-medium",
    "name": "Dialogpt Medium",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/microsoft/DialoGPT-medium",
    "description": "Open source model microsoft/DialoGPT-medium. 433 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 433,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "pytorch",
      "tf",
      "jax",
      "rust",
      "gpt2",
      "conversational",
      "arxiv:1911.00536",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "gpt-oss-20b-bf16",
    "name": "Gpt Oss 20B Bf16",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/unsloth/gpt-oss-20b-BF16",
    "description": "Open source model unsloth/gpt-oss-20b-BF16. 29 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 29,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "gpt_oss",
      "vllm",
      "unsloth",
      "conversational",
      "base_model:openai/gpt-oss-20b",
      "base_model:finetune:openai/gpt-oss-20b",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "16GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 14,
      "context_window_tokens": 4096,
      "parameters_total_b": 20,
      "parameters_active_b": 20,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen2.5-72b-instruct",
    "name": "Qwen2.5 72B Instruct",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
    "description": "Open source model Qwen/Qwen2.5-72B-Instruct. 910 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 910,
    "language": "Python",
    "license": "other",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen2",
      "chat",
      "conversational",
      "en",
      "arxiv:2309.00071",
      "arxiv:2407.10671",
      "base_model:Qwen/Qwen2.5-72B",
      "base_model:finetune:Qwen/Qwen2.5-72B",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "24GB+ VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 50,
      "context_window_tokens": 4096,
      "parameters_total_b": 72,
      "parameters_active_b": 72,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-32b-awq",
    "name": "Qwen3 32B Awq",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen3-32B-AWQ",
    "description": "Open source model Qwen/Qwen3-32B-AWQ. 125 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 125,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen3",
      "conversational",
      "arxiv:2309.00071",
      "arxiv:2505.09388",
      "base_model:Qwen/Qwen3-32B",
      "base_model:quantized:Qwen/Qwen3-32B",
      "text-generation-inference",
      "endpoints_compatible",
      "4-bit",
      "awq",
      "region:us"
    ],
    "hardware_req": "24GB+ VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 22,
      "context_window_tokens": 4096,
      "parameters_total_b": 32,
      "parameters_active_b": 32,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "mimo-v2-flash",
    "name": "Mimo V2 Flash",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/XiaomiMiMo/MiMo-V2-Flash",
    "description": "Open source model XiaomiMiMo/MiMo-V2-Flash. 628 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 628,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "mimo_v2_flash",
      "conversational",
      "custom_code",
      "eval-results",
      "fp8",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-coder-30b-a3b-instruct-fp8",
    "name": "Qwen3 Coder 30B A3B Instruct Fp8",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8",
    "description": "Open source model Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8. 158 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 158,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen3_moe",
      "conversational",
      "arxiv:2505.09388",
      "endpoints_compatible",
      "fp8",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "24GB+ VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 21,
      "context_window_tokens": 4096,
      "parameters_total_b": 30,
      "parameters_active_b": 30,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-8b-fp8",
    "name": "Qwen3 8B Fp8",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen3-8B-FP8",
    "description": "Open source model Qwen/Qwen3-8B-FP8. 56 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 56,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen3",
      "conversational",
      "arxiv:2309.00071",
      "arxiv:2505.09388",
      "base_model:Qwen/Qwen3-8B",
      "base_model:quantized:Qwen/Qwen3-8B",
      "text-generation-inference",
      "endpoints_compatible",
      "fp8",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 6,
      "context_window_tokens": 4096,
      "parameters_total_b": 8,
      "parameters_active_b": 8,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "deepseek-v3.2",
    "name": "Deepseek V3.2",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/deepseek-ai/DeepSeek-V3.2",
    "description": "Open source model deepseek-ai/DeepSeek-V3.2. 1251 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 1251,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "deepseek_v32",
      "conversational",
      "base_model:deepseek-ai/DeepSeek-V3.2-Exp-Base",
      "base_model:finetune:deepseek-ai/DeepSeek-V3.2-Exp-Base",
      "eval-results",
      "endpoints_compatible",
      "fp8",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-coder-next",
    "name": "Qwen3 Coder Next",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen3-Coder-Next",
    "description": "Open source model Qwen/Qwen3-Coder-Next. 912 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 912,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen3_next",
      "conversational",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen2-0.5b",
    "name": "Qwen2 0.5B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen2-0.5B",
    "description": "Open source model Qwen/Qwen2-0.5B. 164 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 164,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen2",
      "pretrained",
      "conversational",
      "en",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 4,
      "context_window_tokens": 4096,
      "parameters_total_b": 5,
      "parameters_active_b": 5,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "mistral-7b-v0.1",
    "name": "Mistral 7B V0.1",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/mistralai/Mistral-7B-v0.1",
    "description": "Open source model mistralai/Mistral-7B-v0.1. 4042 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 4042,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "pytorch",
      "safetensors",
      "mistral",
      "pretrained",
      "mistral-common",
      "en",
      "arxiv:2310.06825",
      "text-generation-inference",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 5,
      "context_window_tokens": 4096,
      "parameters_total_b": 7,
      "parameters_active_b": 7,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "kimi-k2-thinking",
    "name": "Kimi K2 Thinking",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/moonshotai/Kimi-K2-Thinking",
    "description": "Open source model moonshotai/Kimi-K2-Thinking. 1670 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 1670,
    "language": "Python",
    "license": "other",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "kimi_k2",
      "conversational",
      "custom_code",
      "eval-results",
      "endpoints_compatible",
      "compressed-tensors",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "deepseek-r1-0528-qwen3-8b-mlx-4bit",
    "name": "Deepseek R1 0528 Qwen3 8B Mlx 4Bit",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/lmstudio-community/DeepSeek-R1-0528-Qwen3-8B-MLX-4bit",
    "description": "Open source model lmstudio-community/DeepSeek-R1-0528-Qwen3-8B-MLX-4bit. 7 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 7,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "mlx",
      "safetensors",
      "qwen3",
      "conversational",
      "base_model:deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
      "base_model:quantized:deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
      "4-bit",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 6,
      "context_window_tokens": 4096,
      "parameters_total_b": 8,
      "parameters_active_b": 8,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen2.5-7b-instruct-awq",
    "name": "Qwen2.5 7B Instruct Awq",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct-AWQ",
    "description": "Open source model Qwen/Qwen2.5-7B-Instruct-AWQ. 36 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 36,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen2",
      "chat",
      "conversational",
      "en",
      "arxiv:2309.00071",
      "arxiv:2407.10671",
      "base_model:Qwen/Qwen2.5-7B-Instruct",
      "base_model:quantized:Qwen/Qwen2.5-7B-Instruct",
      "text-generation-inference",
      "endpoints_compatible",
      "4-bit",
      "awq",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 5,
      "context_window_tokens": 4096,
      "parameters_total_b": 7,
      "parameters_active_b": 7,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "points-reader",
    "name": "Points Reader",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/tencent/POINTS-Reader",
    "description": "Open source model tencent/POINTS-Reader. 100 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 100,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "image-text-to-text",
      "conversational",
      "custom_code",
      "arxiv:2509.01215",
      "arxiv:2412.08443",
      "arxiv:2409.04828",
      "arxiv:2405.11850",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-4b-base",
    "name": "Qwen3 4B Base",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen3-4B-Base",
    "description": "Open source model Qwen/Qwen3-4B-Base. 80 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 80,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen3",
      "conversational",
      "arxiv:2505.09388",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 3,
      "context_window_tokens": 4096,
      "parameters_total_b": 4,
      "parameters_active_b": 4,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "step-3.5-flash",
    "name": "Step 3.5 Flash",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/stepfun-ai/Step-3.5-Flash",
    "description": "Open source model stepfun-ai/Step-3.5-Flash. 621 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 621,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "step3p5",
      "conversational",
      "custom_code",
      "arxiv:2602.10604",
      "arxiv:2601.05593",
      "arxiv:2507.19427",
      "eval-results",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "kogpt2-base-v2",
    "name": "Kogpt2 Base V2",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/skt/kogpt2-base-v2",
    "description": "Open source model skt/kogpt2-base-v2. 60 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 60,
    "language": "Python",
    "license": "cc-by-nc-sa-4.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "pytorch",
      "jax",
      "gpt2",
      "ko",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "parler-tts-mini-multilingual-v1.1",
    "name": "Parler Tts Mini Multilingual V1.1",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/parler-tts/parler-tts-mini-multilingual-v1.1",
    "description": "Open source model parler-tts/parler-tts-mini-multilingual-v1.1. 54 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 54,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "parler_tts",
      "text-to-speech",
      "annotation",
      "en",
      "fr",
      "es",
      "pt",
      "pl",
      "de",
      "nl",
      "it",
      "dataset:facebook/multilingual_librispeech",
      "dataset:parler-tts/libritts_r_filtered",
      "dataset:parler-tts/libritts-r-filtered-speaker-descriptions",
      "dataset:parler-tts/mls_eng",
      "dataset:parler-tts/mls-eng-speaker-descriptions",
      "dataset:ylacombe/mls-annotated",
      "dataset:ylacombe/cml-tts-filtered-annotated",
      "dataset:PHBJT/cml-tts-filtered",
      "arxiv:2402.01912",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-reranker-8b",
    "name": "Qwen3 Reranker 8B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen3-Reranker-8B",
    "description": "Open source model Qwen/Qwen3-Reranker-8B. 213 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 213,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen3",
      "text-ranking",
      "arxiv:2506.05176",
      "base_model:Qwen/Qwen3-8B-Base",
      "base_model:finetune:Qwen/Qwen3-8B-Base",
      "text-embeddings-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 6,
      "context_window_tokens": 4096,
      "parameters_total_b": 8,
      "parameters_active_b": 8,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "deepseek-r1-0528-qwen3-8b-mlx-8bit",
    "name": "Deepseek R1 0528 Qwen3 8B Mlx 8Bit",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/lmstudio-community/DeepSeek-R1-0528-Qwen3-8B-MLX-8bit",
    "description": "Open source model lmstudio-community/DeepSeek-R1-0528-Qwen3-8B-MLX-8bit. 13 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 13,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "mlx",
      "safetensors",
      "qwen3",
      "conversational",
      "base_model:deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
      "base_model:quantized:deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
      "8-bit",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 6,
      "context_window_tokens": 4096,
      "parameters_total_b": 8,
      "parameters_active_b": 8,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "powermoe-3b",
    "name": "Powermoe 3B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/ibm-research/PowerMoE-3b",
    "description": "Open source model ibm-research/PowerMoE-3b. 14 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 14,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "granitemoe",
      "arxiv:2408.13359",
      "model-index",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 2,
      "context_window_tokens": 4096,
      "parameters_total_b": 3,
      "parameters_active_b": 3,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "llada-8b-instruct",
    "name": "Llada 8B Instruct",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/GSAI-ML/LLaDA-8B-Instruct",
    "description": "Open source model GSAI-ML/LLaDA-8B-Instruct. 342 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 342,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "llada",
      "conversational",
      "custom_code",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 6,
      "context_window_tokens": 4096,
      "parameters_total_b": 8,
      "parameters_active_b": 8,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "apertus-8b-instruct-2509",
    "name": "Apertus 8B Instruct 2509",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/swiss-ai/Apertus-8B-Instruct-2509",
    "description": "Open source model swiss-ai/Apertus-8B-Instruct-2509. 435 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 435,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "apertus",
      "multilingual",
      "compliant",
      "swiss-ai",
      "conversational",
      "arxiv:2509.14233",
      "base_model:swiss-ai/Apertus-8B-2509",
      "base_model:finetune:swiss-ai/Apertus-8B-2509",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 6,
      "context_window_tokens": 4096,
      "parameters_total_b": 8,
      "parameters_active_b": 8,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-30b-a3b-gptq-int4",
    "name": "Qwen3 30B A3B Gptq Int4",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen3-30B-A3B-GPTQ-Int4",
    "description": "Open source model Qwen/Qwen3-30B-A3B-GPTQ-Int4. 45 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 45,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen3_moe",
      "conversational",
      "arxiv:2309.00071",
      "arxiv:2505.09388",
      "base_model:Qwen/Qwen3-30B-A3B",
      "base_model:quantized:Qwen/Qwen3-30B-A3B",
      "endpoints_compatible",
      "4-bit",
      "gptq",
      "region:us"
    ],
    "hardware_req": "24GB+ VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 21,
      "context_window_tokens": 4096,
      "parameters_total_b": 30,
      "parameters_active_b": 30,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "tinyllama-1.1b-chat-v0.3-gptq",
    "name": "Tinyllama 1.1B Chat V0.3 Gptq",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GPTQ",
    "description": "Open source model TheBloke/TinyLlama-1.1B-Chat-v0.3-GPTQ. 9 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 9,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "llama",
      "en",
      "dataset:cerebras/SlimPajama-627B",
      "dataset:bigcode/starcoderdata",
      "dataset:OpenAssistant/oasst_top1_2023-08-25",
      "base_model:TinyLlama/TinyLlama-1.1B-Chat-v0.3",
      "base_model:quantized:TinyLlama/TinyLlama-1.1B-Chat-v0.3",
      "text-generation-inference",
      "4-bit",
      "gptq",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 1,
      "parameters_active_b": 1,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "prot_t5_xl_bfd",
    "name": "Prot_T5_Xl_Bfd",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Rostlab/prot_t5_xl_bfd",
    "description": "Open source model Rostlab/prot_t5_xl_bfd. 10 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 10,
    "language": "Python",
    "license": "unknown",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "pytorch",
      "tf",
      "t5",
      "protein language model",
      "dataset:BFD",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-4b-instruct-2507-unsloth-bnb-4bit",
    "name": "Qwen3 4B Instruct 2507 Unsloth Bnb 4Bit",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/unsloth/Qwen3-4B-Instruct-2507-unsloth-bnb-4bit",
    "description": "Open source model unsloth/Qwen3-4B-Instruct-2507-unsloth-bnb-4bit. 13 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 13,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen3",
      "unsloth",
      "conversational",
      "arxiv:2505.09388",
      "base_model:Qwen/Qwen3-4B-Instruct-2507",
      "base_model:quantized:Qwen/Qwen3-4B-Instruct-2507",
      "text-generation-inference",
      "endpoints_compatible",
      "4-bit",
      "bitsandbytes",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 3,
      "context_window_tokens": 4096,
      "parameters_total_b": 4,
      "parameters_active_b": 4,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "phi-3.5-mini-instruct",
    "name": "Phi 3.5 Mini Instruct",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/microsoft/Phi-3.5-mini-instruct",
    "description": "Open source model microsoft/Phi-3.5-mini-instruct. 963 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 963,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "phi3",
      "nlp",
      "code",
      "conversational",
      "custom_code",
      "multilingual",
      "arxiv:2404.14219",
      "arxiv:2407.13833",
      "arxiv:2403.06412",
      "eval-results",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "meta-llama-3.1-8b-instruct-bnb-4bit",
    "name": "Meta Llama 3.1 8B Instruct Bnb 4Bit",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
    "description": "Open source model unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit. 95 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 95,
    "language": "Python",
    "license": "llama3.1",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "llama",
      "llama-3",
      "meta",
      "facebook",
      "unsloth",
      "conversational",
      "en",
      "arxiv:2204.05149",
      "base_model:meta-llama/Llama-3.1-8B-Instruct",
      "base_model:quantized:meta-llama/Llama-3.1-8B-Instruct",
      "text-generation-inference",
      "endpoints_compatible",
      "4-bit",
      "bitsandbytes",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 6,
      "context_window_tokens": 4096,
      "parameters_total_b": 8,
      "parameters_active_b": 8,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "glm-4.7-flash-awq-4bit",
    "name": "Glm 4.7 Flash Awq 4Bit",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/cyankiwi/GLM-4.7-Flash-AWQ-4bit",
    "description": "Open source model cyankiwi/GLM-4.7-Flash-AWQ-4bit. 43 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 43,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "glm4_moe_lite",
      "conversational",
      "en",
      "zh",
      "arxiv:2508.06471",
      "base_model:zai-org/GLM-4.7-Flash",
      "base_model:quantized:zai-org/GLM-4.7-Flash",
      "endpoints_compatible",
      "compressed-tensors",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 3,
      "context_window_tokens": 4096,
      "parameters_total_b": 4,
      "parameters_active_b": 4,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "dots.ocr",
    "name": "Dots.Ocr",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/rednote-hilab/dots.ocr",
    "description": "Open source model rednote-hilab/dots.ocr. 1243 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 1243,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "dots_ocr",
      "safetensors",
      "image-to-text",
      "ocr",
      "document-parse",
      "layout",
      "table",
      "formula",
      "transformers",
      "custom_code",
      "image-text-to-text",
      "conversational",
      "en",
      "zh",
      "multilingual",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "mistral-7b-bnb-4bit",
    "name": "Mistral 7B Bnb 4Bit",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/unsloth/mistral-7b-bnb-4bit",
    "description": "Open source model unsloth/mistral-7b-bnb-4bit. 30 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 30,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "mistral",
      "unsloth",
      "mistral-7b",
      "bnb",
      "en",
      "text-generation-inference",
      "endpoints_compatible",
      "4-bit",
      "bitsandbytes",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 5,
      "context_window_tokens": 4096,
      "parameters_total_b": 7,
      "parameters_active_b": 7,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "glm-5-fp8",
    "name": "Glm 5 Fp8",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/zai-org/GLM-5-FP8",
    "description": "Open source model zai-org/GLM-5-FP8. 108 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 108,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "glm_moe_dsa",
      "conversational",
      "en",
      "zh",
      "endpoints_compatible",
      "fp8",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen-7b",
    "name": "Qwen 7B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen-7B",
    "description": "Open source model Qwen/Qwen-7B. 395 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 395,
    "language": "Python",
    "license": "other",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen",
      "custom_code",
      "zh",
      "en",
      "arxiv:2309.16609",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 5,
      "context_window_tokens": 4096,
      "parameters_total_b": 7,
      "parameters_active_b": 7,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwq-32b-awq",
    "name": "Qwq 32B Awq",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/QwQ-32B-AWQ",
    "description": "Open source model Qwen/QwQ-32B-AWQ. 133 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 133,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "safetensors",
      "qwen2",
      "chat",
      "conversational",
      "en",
      "arxiv:2309.00071",
      "arxiv:2412.15115",
      "base_model:Qwen/QwQ-32B",
      "base_model:quantized:Qwen/QwQ-32B",
      "4-bit",
      "awq",
      "region:us"
    ],
    "hardware_req": "24GB+ VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 22,
      "context_window_tokens": 4096,
      "parameters_total_b": 32,
      "parameters_active_b": 32,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "deepseek-r1-distill-llama-70b",
    "name": "Deepseek R1 Distill Llama 70B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
    "description": "Open source model deepseek-ai/DeepSeek-R1-Distill-Llama-70B. 741 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 741,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "llama",
      "conversational",
      "arxiv:2501.12948",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "24GB+ VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 49,
      "context_window_tokens": 4096,
      "parameters_total_b": 70,
      "parameters_active_b": 70,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen2.5-coder-7b",
    "name": "Qwen2.5 Coder 7B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen2.5-Coder-7B",
    "description": "Open source model Qwen/Qwen2.5-Coder-7B. 134 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 134,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen2",
      "code",
      "qwen",
      "qwen-coder",
      "codeqwen",
      "conversational",
      "en",
      "arxiv:2409.12186",
      "arxiv:2309.00071",
      "arxiv:2407.10671",
      "base_model:Qwen/Qwen2.5-7B",
      "base_model:finetune:Qwen/Qwen2.5-7B",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 5,
      "context_window_tokens": 4096,
      "parameters_total_b": 7,
      "parameters_active_b": 7,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen2.5-3b",
    "name": "Qwen2.5 3B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen2.5-3B",
    "description": "Open source model Qwen/Qwen2.5-3B. 169 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 169,
    "language": "Python",
    "license": "other",
    "tags": [
      "AI",
      "LLM",
      "safetensors",
      "qwen2",
      "conversational",
      "en",
      "arxiv:2407.10671",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 2,
      "context_window_tokens": 4096,
      "parameters_total_b": 3,
      "parameters_active_b": 3,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "deepseek-v2-lite-chat",
    "name": "Deepseek V2 Lite Chat",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/deepseek-ai/DeepSeek-V2-Lite-Chat",
    "description": "Open source model deepseek-ai/DeepSeek-V2-Lite-Chat. 133 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 133,
    "language": "Python",
    "license": "other",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "deepseek_v2",
      "conversational",
      "custom_code",
      "arxiv:2405.04434",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "tiny-qwen3forcausallm",
    "name": "Tiny Qwen3Forcausallm",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/trl-internal-testing/tiny-Qwen3ForCausalLM",
    "description": "Open source model trl-internal-testing/tiny-Qwen3ForCausalLM. 1 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 1,
    "language": "Python",
    "license": "unknown",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen3",
      "trl",
      "conversational",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "deepseek-coder-v2-lite-instruct",
    "name": "Deepseek Coder V2 Lite Instruct",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct",
    "description": "Open source model deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct. 539 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 539,
    "language": "Python",
    "license": "other",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "deepseek_v2",
      "conversational",
      "custom_code",
      "arxiv:2401.06066",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen3-0.6b-base",
    "name": "Qwen3 0.6B Base",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen3-0.6B-Base",
    "description": "Open source model Qwen/Qwen3-0.6B-Base. 146 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 146,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen3",
      "conversational",
      "arxiv:2505.09388",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 4,
      "context_window_tokens": 4096,
      "parameters_total_b": 6,
      "parameters_active_b": 6,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "diffractgpt_mistral_chemical_formula",
    "name": "Diffractgpt_Mistral_Chemical_Formula",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/knc6/diffractgpt_mistral_chemical_formula",
    "description": "Open source model knc6/diffractgpt_mistral_chemical_formula. 1 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 1,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "peft",
      "safetensors",
      "chemistry",
      "text-generation-inference",
      "atomgpt",
      "diffraction",
      "en",
      "base_model:unsloth/mistral-7b-bnb-4bit",
      "base_model:adapter:unsloth/mistral-7b-bnb-4bit",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "qwen-7b-chat",
    "name": "Qwen 7B Chat",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Qwen/Qwen-7B-Chat",
    "description": "Open source model Qwen/Qwen-7B-Chat. 787 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 787,
    "language": "Python",
    "license": "other",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "qwen",
      "custom_code",
      "zh",
      "en",
      "arxiv:2309.16609",
      "arxiv:2305.08322",
      "arxiv:2009.03300",
      "arxiv:2305.05280",
      "arxiv:2210.03629",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 5,
      "context_window_tokens": 4096,
      "parameters_total_b": 7,
      "parameters_active_b": 7,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "nvidia-nemotron-3-nano-30b-a3b-nvfp4",
    "name": "Nvidia Nemotron 3 Nano 30B A3B Nvfp4",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4",
    "description": "Open source model nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4. 100 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 100,
    "language": "Python",
    "license": "other",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "nemotron_h",
      "feature-extraction",
      "nvidia",
      "pytorch",
      "conversational",
      "custom_code",
      "en",
      "es",
      "fr",
      "de",
      "ja",
      "it",
      "dataset:nvidia/Nemotron-Pretraining-Code-v1",
      "dataset:nvidia/Nemotron-CC-v2",
      "dataset:nvidia/Nemotron-Pretraining-SFT-v1",
      "dataset:nvidia/Nemotron-CC-Math-v1",
      "dataset:nvidia/Nemotron-Pretraining-Code-v2",
      "dataset:nvidia/Nemotron-Pretraining-Specialized-v1",
      "dataset:nvidia/Nemotron-CC-v2.1",
      "dataset:nvidia/Nemotron-CC-Code-v1",
      "dataset:nvidia/Nemotron-Pretraining-Dataset-sample",
      "dataset:nvidia/Nemotron-Competitive-Programming-v1",
      "dataset:nvidia/Nemotron-Math-v2",
      "dataset:nvidia/Nemotron-Agentic-v1",
      "dataset:nvidia/Nemotron-Math-Proofs-v1",
      "dataset:nvidia/Nemotron-Instruction-Following-Chat-v1",
      "dataset:nvidia/Nemotron-Science-v1",
      "dataset:nvidia/Nemotron-3-Nano-RL-Training-Blend",
      "arxiv:2512.20848",
      "arxiv:2512.20856",
      "arxiv:2601.20088",
      "base_model:nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
      "base_model:quantized:nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
      "region:us"
    ],
    "hardware_req": "24GB+ VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 21,
      "context_window_tokens": 4096,
      "parameters_total_b": 30,
      "parameters_active_b": 30,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "falcon-h1-tiny-90m-instruct",
    "name": "Falcon H1 Tiny 90M Instruct",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/tiiuae/Falcon-H1-Tiny-90M-Instruct",
    "description": "Open source model tiiuae/Falcon-H1-Tiny-90M-Instruct. 31 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 31,
    "language": "Python",
    "license": "other",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "falcon_h1",
      "falcon-h1",
      "edge",
      "conversational",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "hermes-3-llama-3.2-3b",
    "name": "Hermes 3 Llama 3.2 3B",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/NousResearch/Hermes-3-Llama-3.2-3B",
    "description": "Open source model NousResearch/Hermes-3-Llama-3.2-3B. 174 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 174,
    "language": "Python",
    "license": "llama3",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "llama",
      "Llama-3",
      "instruct",
      "finetune",
      "chatml",
      "gpt4",
      "synthetic data",
      "distillation",
      "function calling",
      "json mode",
      "axolotl",
      "roleplaying",
      "chat",
      "conversational",
      "en",
      "arxiv:2408.11857",
      "text-generation-inference",
      "endpoints_compatible",
      "deploy:azure",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 2,
      "context_window_tokens": 4096,
      "parameters_total_b": 3,
      "parameters_active_b": 3,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "meta-llama-3.1-8b-instruct",
    "name": "Meta Llama 3.1 8B Instruct",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/unsloth/Meta-Llama-3.1-8B-Instruct",
    "description": "Open source model unsloth/Meta-Llama-3.1-8B-Instruct. 94 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 94,
    "language": "Python",
    "license": "llama3.1",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "llama",
      "llama-3",
      "meta",
      "facebook",
      "unsloth",
      "conversational",
      "en",
      "base_model:meta-llama/Llama-3.1-8B-Instruct",
      "base_model:finetune:meta-llama/Llama-3.1-8B-Instruct",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 6,
      "context_window_tokens": 4096,
      "parameters_total_b": 8,
      "parameters_active_b": 8,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "meta-llama-3.1-8b-instruct-gguf",
    "name": "Meta Llama 3.1 8B Instruct Gguf",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
    "description": "Open source model bartowski/Meta-Llama-3.1-8B-Instruct-GGUF. 321 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 321,
    "language": "Python",
    "license": "llama3.1",
    "tags": [
      "AI",
      "LLM",
      "gguf",
      "facebook",
      "meta",
      "pytorch",
      "llama",
      "llama-3",
      "en",
      "de",
      "fr",
      "it",
      "pt",
      "hi",
      "es",
      "th",
      "base_model:meta-llama/Llama-3.1-8B-Instruct",
      "base_model:quantized:meta-llama/Llama-3.1-8B-Instruct",
      "endpoints_compatible",
      "region:us",
      "conversational"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 6,
      "context_window_tokens": 4096,
      "parameters_total_b": 8,
      "parameters_active_b": 8,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "deepseek-v3-0324",
    "name": "Deepseek V3 0324",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/deepseek-ai/DeepSeek-V3-0324",
    "description": "Open source model deepseek-ai/DeepSeek-V3-0324. 3087 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 3087,
    "language": "Python",
    "license": "mit",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "deepseek_v3",
      "conversational",
      "custom_code",
      "arxiv:2412.19437",
      "eval-results",
      "text-generation-inference",
      "endpoints_compatible",
      "fp8",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "elm",
    "name": "Elm",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/Joaoffg/ELM",
    "description": "Open source model Joaoffg/ELM. 2 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 2,
    "language": "Python",
    "license": "llama2",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "llama",
      "academic",
      "university",
      "en",
      "nl",
      "arxiv:2408.06931",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "llama-2-13b-chat-hf",
    "name": "Llama 2 13B Chat Hf",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/meta-llama/Llama-2-13b-chat-hf",
    "description": "Open source model meta-llama/Llama-2-13b-chat-hf. 1109 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 1109,
    "language": "Python",
    "license": "llama2",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "pytorch",
      "safetensors",
      "llama",
      "facebook",
      "meta",
      "llama-2",
      "conversational",
      "en",
      "arxiv:2307.09288",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "16GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 9,
      "context_window_tokens": 4096,
      "parameters_total_b": 13,
      "parameters_active_b": 13,
      "is_multimodal": false
    },
    "referral_url": ""
  },
  {
    "slug": "svara-tts-v1",
    "name": "Svara Tts V1",
    "category": "AI Models",
    "is_open_source": true,
    "website": "https://huggingface.co/kenpath/svara-tts-v1",
    "description": "Open source model kenpath/svara-tts-v1. 18 likes on Hugging Face.",
    "pros": [
      "Open Source",
      "Running Locally"
    ],
    "cons": [
      "Requires GPU"
    ],
    "stars": 18,
    "language": "Python",
    "license": "apache-2.0",
    "tags": [
      "AI",
      "LLM",
      "transformers",
      "safetensors",
      "llama",
      "text-to-speech",
      "speech-synthesis",
      "multilingual",
      "indic",
      "orpheus",
      "lora",
      "low-latency",
      "gguf",
      "zero-shot",
      "emotions",
      "discrete-audio-tokens",
      "hi",
      "bn",
      "mr",
      "te",
      "kn",
      "bho",
      "mag",
      "hne",
      "mai",
      "as",
      "brx",
      "doi",
      "gu",
      "ml",
      "pa",
      "ta",
      "ne",
      "sa",
      "en",
      "dataset:SYSPIN",
      "dataset:RASA",
      "dataset:IndicTTS",
      "dataset:SPICOR",
      "base_model:canopylabs/3b-hi-ft-research_release",
      "base_model:adapter:canopylabs/3b-hi-ft-research_release",
      "text-generation-inference",
      "endpoints_compatible",
      "region:us"
    ],
    "hardware_req": "8GB VRAM",
    "hosting_type": "self-hosted",
    "ai_metadata": {
      "vram_inference_gb": 1,
      "context_window_tokens": 4096,
      "parameters_total_b": 0,
      "parameters_active_b": 0,
      "is_multimodal": false
    },
    "referral_url": ""
  }
]