InferX Catalog | Qwen3.6-35B-A3B-FP8

Qwen3.6-35B-A3B-FP8

Qwen3.6-35B-A3B-FP8 (officially released on April 16, 2026) is the first natively quantized FP8 variant of the Qwen3.6 series.

Qwen multimodal image2text

Log in to deploy

Metadata

Provider

Qwen

Modality

multimodal

API type

image2text

Source

huggingface / Qwen/Qwen3.6-35B-A3B-FP8

Created

2026-04-19 13:53:37 UTC

Updated

2026-06-21 22:27:42 UTC

Catalog version

17

Visibility

Published

Specifications

Parameters

36.00B

MoE

No

Max model length

262000

Image

inferx/vllm-openai:v0.20.2-cu129

Default Deploy Config

GPU count

1

vRAM

70000 MB

Summary

1xGPU 70000 MB

Recommended Use Cases

—

Model Spec

{
    "image": "inferx/vllm-openai:v0.20.2-cu129",
    "commands": [
        "--model",
        "Qwen/Qwen3.6-35B-A3B-fp8",
        "--enable-auto-tool-choice",
        "--tool-call-parser",
        "qwen3_coder",
        "--trust-remote-code",
        "--gpu-memory-utilization",
        "0.92",
        "--max-model-len",
        "262000",
        "--max-num-seqs",
        "8",
        "--max-num-batched-tokens",
        "26200",
        "--mm-processor-cache-gb",
        "0",
        "--uvicorn-log-level",
        "warning",
        "--enable-prefix-caching",
        "--tensor-parallel-size",
        "1"
    ],
    "resources": {
        "GPU": {
            "Count": 1,
            "vRam": 70000
        }
    },
    "envs": [
        [
            "VLLM_SERVER_DEV_MODE",
            "1"
        ],
        [
            "VLLM_USE_DEEP_GEMM",
            "0"
        ]
    ],
    "policy": {
        "Obj": {
            "min_replica": 0,
            "max_replica": 1,
            "standby_per_node": 1,
            "parallel": 50,
            "queue_len": 100,
            "queue_timeout": 30.0,
            "scalein_timeout": 1.0,
            "scaleout_policy": {
                "WaitQueueRatio": {
                    "wait_ratio": 0.1
                }
            },
            "runtime_config": {
                "graph_sync": false
            }
        }
    },
    "sample_query": {
        "body": {
            "max_tokens": "200",
            "temperature": "0"
        },
        "path": "v1/chat/completions",
        "prompt": "What is in this image?",
        "apiType": "image2text",
        "dataUrl": "https://www.ilankelman.org/stopsigns/australia.jpg",
        "prompts": [],
        "loadingTimeout": 90
    }
}