{"data":[{"id":"gpt-5.5-pro","name":"OpenAI: GPT-5.5 Pro","created":1777051896,"description":"GPT-5.5 Pro is OpenAI’s high-capability model optimized for deep reasoning and accuracy on complex, high-stakes workloads. It features a 1M+ token context window (922K input, 128K output) with support for...","context_length":1050000,"architecture":{"modality":"text+image+file->text","input_modalities":["file","image","text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":1.25685e-05,"completion":7.541100000000001e-05,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":13.196925,"max_completion_cost":9.652608000000003,"max_cost":21.240765000000003},"sats_pricing":{"prompt":0.015607416083828162,"completion":0.09364449650296898,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":16387.78688801957,"max_completion_cost":11986.49555238003,"max_cost":26376.533181669598},"per_request_limits":null,"top_provider":{"context_length":1050000,"max_completion_tokens":128000,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-5.5-pro-20260423","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-5.5","name":"OpenAI: GPT-5.5","created":1777051893,"description":"GPT-5.5 is OpenAI’s frontier model designed for complex professional workloads, building on GPT-5.4 with stronger reasoning, higher reliability, and improved token efficiency on hard tasks. It features a 1M+ token...","context_length":1050000,"architecture":{"modality":"text+image+file->text","input_modalities":["file","image","text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":2.09475e-06,"completion":1.25685e-05,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":1.995e-07,"input_cache_write":0.0,"max_prompt_cost":2.1994875,"max_completion_cost":1.608768,"max_cost":3.5401275},"sats_pricing":{"prompt":0.0026012360139713604,"completion":0.015607416083828162,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":0.0002477367632353677,"input_cache_write":0.0,"max_prompt_cost":2731.2978146699284,"max_completion_cost":1997.7492587300046,"max_cost":4396.088863611599},"per_request_limits":null,"top_provider":{"context_length":1050000,"max_completion_tokens":128000,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-5.5-20260423","alias_ids":null,"forwarded_model_id":null},{"id":"claude-opus-4.7","name":"Anthropic: Claude Opus 4.7","created":1776351100,"description":"Opus 4.7 is the next generation of Anthropic's Opus family, built for long-running, asynchronous agents. Building on the coding and agentic strengths of Opus 4.6, it delivers stronger performance on...","context_length":1000000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Claude","instruct_type":null},"pricing":{"prompt":2.09475e-06,"completion":1.0473750000000001e-05,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":1.995e-07,"input_cache_write":2.49375e-06,"max_prompt_cost":2.0947500000000003,"max_completion_cost":1.34064,"max_cost":3.167262},"sats_pricing":{"prompt":0.0026012360139713604,"completion":0.013006180069856802,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":0.0002477367632353677,"input_cache_write":0.0030967095404420956,"max_prompt_cost":2601.2360139713605,"max_completion_cost":1664.7910489416706,"max_cost":3933.0688531246965},"per_request_limits":null,"top_provider":{"context_length":1000000,"max_completion_tokens":128000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"anthropic/claude-4.7-opus-20260416","alias_ids":null,"forwarded_model_id":null},{"id":"grok-4.20","name":"xAI: Grok 4.20","created":1774979019,"description":"Grok 4.20 is xAI's newest flagship model with industry-leading speed and agentic tool calling capabilities. It combines the lowest hallucination rate on the market with strict prompt adherance, delivering consistently...","context_length":2000000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Grok","instruct_type":null},"pricing":{"prompt":5.236875e-07,"completion":1.047375e-06,"request":0.0,"image":0.0,"web_search":0.0019950000000000002,"internal_reasoning":0.0,"input_cache_read":7.98e-08,"input_cache_write":0.0,"max_prompt_cost":1.0473750000000002,"max_completion_cost":2.0947500000000003,"max_cost":2.0947500000000003},"sats_pricing":{"prompt":0.0006503090034928401,"completion":0.0013006180069856802,"request":0.001,"image":0.0,"web_search":2.4773676323536766,"internal_reasoning":0.0,"input_cache_read":9.909470529414706e-05,"input_cache_write":0.0,"max_prompt_cost":1300.6180069856803,"max_completion_cost":2601.2360139713605,"max_cost":2601.2360139713605},"per_request_limits":null,"top_provider":{"context_length":2000000,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"x-ai/grok-4.20-20260309","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-5.4-nano","name":"OpenAI: GPT-5.4 Nano","created":1773748187,"description":"GPT-5.4 nano is the most lightweight and cost-efficient variant of the GPT-5.4 family, optimized for speed-critical and high-volume tasks. It supports text and image inputs and is designed for low-latency...","context_length":400000,"architecture":{"modality":"text+image+file->text","input_modalities":["file","image","text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":8.379000000000001e-08,"completion":5.236875e-07,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":7.980000000000001e-09,"input_cache_write":0.0,"max_prompt_cost":0.033516000000000004,"max_completion_cost":0.06703200000000001,"max_cost":0.08982288000000001},"sats_pricing":{"prompt":0.00010404944055885442,"completion":0.0006503090034928401,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":9.909470529414706e-06,"input_cache_write":0.0,"max_prompt_cost":41.61977622354177,"max_completion_cost":83.23955244708354,"max_cost":111.54100027909193},"per_request_limits":null,"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-5.4-nano-20260317","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-5.4-mini","name":"OpenAI: GPT-5.4 Mini","created":1773748178,"description":"GPT-5.4 mini brings the core capabilities of GPT-5.4 to a faster, more efficient model optimized for high-throughput workloads. It supports text and image inputs with strong performance across reasoning, coding,...","context_length":400000,"architecture":{"modality":"text+image+file->text","input_modalities":["file","image","text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":3.142125e-07,"completion":1.885275e-06,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":2.9925e-08,"input_cache_write":0.0,"max_prompt_cost":0.12568500000000002,"max_completion_cost":0.24131519999999998,"max_cost":0.326781},"sats_pricing":{"prompt":0.00039018540209570404,"completion":0.002341112412574224,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":3.7160514485305145e-05,"input_cache_write":0.0,"max_prompt_cost":156.07416083828164,"max_completion_cost":299.66238880950067,"max_cost":405.79281817953216},"per_request_limits":null,"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-5.4-mini-20260317","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-5.3-chat","name":"OpenAI: GPT-5.3 Chat","created":1772564061,"description":"GPT-5.3 Chat is an update to ChatGPT's most-used model that makes everyday conversations smoother, more useful, and more directly helpful. It delivers more accurate answers with better contextualization and significantly...","context_length":128000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":7.331625e-07,"completion":5.8653e-06,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":6.9825e-08,"input_cache_write":0.0,"max_prompt_cost":0.0938448,"max_completion_cost":0.0960970752,"max_cost":0.1779297408},"sats_pricing":{"prompt":0.0009104326048899762,"completion":0.0072834608391198095,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":8.670786713237869e-05,"input_cache_write":0.0,"max_prompt_cost":116.53537342591694,"max_completion_cost":119.33222238813896,"max_cost":220.95106801553854},"per_request_limits":null,"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-5.3-chat-20260303","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-5.3-codex","name":"OpenAI: GPT-5.3-Codex","created":1771959164,"description":"GPT-5.3-Codex is OpenAI’s most advanced agentic coding model, combining the frontier software engineering performance of GPT-5.2-Codex with the broader reasoning and professional knowledge capabilities of GPT-5.2. It achieves state-of-the-art results...","context_length":400000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":7.331625e-07,"completion":5.8653e-06,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":6.9825e-08,"input_cache_write":0.0,"max_prompt_cost":0.293265,"max_completion_cost":0.7507584,"max_cost":0.9501786000000001},"sats_pricing":{"prompt":0.0009104326048899762,"completion":0.0072834608391198095,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":8.670786713237869e-05,"input_cache_write":0.0,"max_prompt_cost":364.1730419559904,"max_completion_cost":932.2829874073356,"max_cost":1179.920655937409},"per_request_limits":null,"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-5.3-codex-20260224","alias_ids":null,"forwarded_model_id":null},{"id":"claude-sonnet-4.6","name":"Anthropic: Claude Sonnet 4.6","created":1771342990,"description":"Sonnet 4.6 is Anthropic's most capable Sonnet-class model yet, with frontier performance across coding, agents, and professional work. It excels at iterative development, complex codebase navigation, end-to-end project management with...","context_length":1000000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Claude","instruct_type":null},"pricing":{"prompt":1.25685e-06,"completion":6.28425e-06,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":1.197e-07,"input_cache_write":1.49625e-06,"max_prompt_cost":1.25685,"max_completion_cost":0.804384,"max_cost":1.9003572},"sats_pricing":{"prompt":0.0015607416083828162,"completion":0.007803708041914081,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":0.00014864205794122058,"input_cache_write":0.0018580257242652575,"max_prompt_cost":1560.7416083828161,"max_completion_cost":998.8746293650023,"max_cost":2359.841311874818},"per_request_limits":null,"top_provider":{"context_length":1000000,"max_completion_tokens":128000,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"anthropic/claude-4.6-sonnet-20260217","alias_ids":null,"forwarded_model_id":null},{"id":"gemini-3-flash-preview","name":"Google: Gemini 3 Flash Preview","created":1765987078,"description":"Gemini 3 Flash Preview is a high speed, high value thinking model designed for agentic workflows, multi turn chat, and coding assistance. It delivers near Pro level reasoning and tool...","context_length":1048576,"architecture":{"modality":"text+image+file+audio+video->text","input_modalities":["text","image","file","audio","video"],"output_modalities":["text"],"tokenizer":"Gemini","instruct_type":null},"pricing":{"prompt":1.3965e-07,"completion":8.379000000000002e-07,"request":0.0,"image":1.995e-07,"web_search":0.005586000000000001,"internal_reasoning":1.1970000000000001e-06,"input_cache_read":1.995e-08,"input_cache_write":3.325e-08,"max_prompt_cost":0.1464336384,"max_completion_cost":0.05491261440000001,"max_cost":0.1921941504},"sats_pricing":{"prompt":0.00017341573426475737,"completion":0.0010404944055885444,"request":0.001,"image":0.0002477367632353677,"web_search":6.936629370590294,"internal_reasoning":0.001486420579412206,"input_cache_read":2.4773676323536764e-05,"input_cache_write":4.1289460539227946e-05,"max_prompt_cost":181.83957697240223,"max_completion_cost":68.18984136465085,"max_cost":238.66444477627792},"per_request_limits":null,"top_provider":{"context_length":1048576,"max_completion_tokens":65536,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"google/gemini-3-flash-preview-20251217","alias_ids":null,"forwarded_model_id":null},{"id":"claude-haiku-4.5","name":"Anthropic: Claude Haiku 4.5","created":1760547638,"description":"Claude Haiku 4.5 is Anthropic’s fastest and most efficient model, delivering near-frontier intelligence at a fraction of the cost and latency of larger Claude models. Matching Claude Sonnet 4’s performance...","context_length":200000,"architecture":{"modality":"text+image->text","input_modalities":["image","text"],"output_modalities":["text"],"tokenizer":"Claude","instruct_type":null},"pricing":{"prompt":4.189500000000001e-07,"completion":2.09475e-06,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":3.99e-08,"input_cache_write":4.987500000000001e-07,"max_prompt_cost":0.08379000000000002,"max_completion_cost":0.13406400000000002,"max_cost":0.19104120000000002},"sats_pricing":{"prompt":0.0005202472027942722,"completion":0.0026012360139713604,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":4.954735264707353e-05,"input_cache_write":0.0006193419080884193,"max_prompt_cost":104.04944055885443,"max_completion_cost":166.47910489416708,"max_cost":237.23272447418807},"per_request_limits":null,"top_provider":{"context_length":200000,"max_completion_tokens":64000,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"anthropic/claude-4.5-haiku-20251001","alias_ids":null,"forwarded_model_id":null},{"id":"grok-4.3","name":"xAI: Grok 4.3","created":1777591821,"description":"Grok 4.3 is a reasoning model from xAI. It accepts text and image inputs with text output, and is suited for agentic workflows, instruction-following tasks, and applications requiring high factual...","context_length":1000000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Grok","instruct_type":null},"pricing":{"prompt":5.236875e-07,"completion":1.047375e-06,"request":0.0,"image":0.0,"web_search":0.0019950000000000002,"internal_reasoning":0.0,"input_cache_read":7.98e-08,"input_cache_write":0.0,"max_prompt_cost":0.5236875000000001,"max_completion_cost":1.0473750000000002,"max_cost":1.0473750000000002},"sats_pricing":{"prompt":0.0006503090034928401,"completion":0.0013006180069856802,"request":0.001,"image":0.0,"web_search":2.4773676323536766,"internal_reasoning":0.0,"input_cache_read":9.909470529414706e-05,"input_cache_write":0.0,"max_prompt_cost":650.3090034928401,"max_completion_cost":1300.6180069856803,"max_cost":1300.6180069856803},"per_request_limits":null,"top_provider":{"context_length":1000000,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"x-ai/grok-4.3-20260430","alias_ids":null,"forwarded_model_id":null},{"id":"granite-4.1-8b","name":"IBM: Granite 4.1 8B","created":1777577071,"description":"Granite 4.1 8B is a dense, decoder-only 8-billion-parameter language model from IBM, part of the Granite 4.1 family. It supports a 131K-token context window and is designed for enterprise tasks...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":2.0947500000000003e-08,"completion":4.1895000000000006e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.995e-08,"input_cache_write":0.0,"max_prompt_cost":0.0027456307200000004,"max_completion_cost":0.005491261440000001,"max_cost":0.005491261440000001},"sats_pricing":{"prompt":2.6012360139713605e-05,"completion":5.202472027942721e-05,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":2.4773676323536764e-05,"input_cache_write":0.0,"max_prompt_cost":3.4094920682325416,"max_completion_cost":6.818984136465083,"max_cost":6.818984136465083},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":131072,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"ibm-granite/granite-4.1-8b-20260429","alias_ids":null,"forwarded_model_id":null},{"id":"nemotron-3-nano-omni-30b-a3b-reasoning","name":"Nemotron 3 Nano Omni","created":1777393095,"description":"PPQ.AI model","context_length":256000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Unknown","instruct_type":null},"pricing":{"prompt":1.25685e-07,"completion":4.189500000000001e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.03217536,"max_completion_cost":0.10725120000000002,"max_cost":0.10725120000000002},"sats_pricing":{"prompt":0.00015607416083828163,"completion":0.0005202472027942722,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":39.9549851746001,"max_completion_cost":133.18328391533367,"max_cost":133.18328391533367},"per_request_limits":null,"top_provider":null,"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":null,"alias_ids":null,"forwarded_model_id":null},{"id":"laguna-xs.2","name":"Laguna XS.2","created":1777389604,"description":"PPQ.AI model","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Unknown","instruct_type":null},"pricing":{"prompt":1.25685e-07,"completion":4.189500000000001e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.01647378432,"max_completion_cost":0.05491261440000001,"max_cost":0.05491261440000001},"sats_pricing":{"prompt":0.00015607416083828163,"completion":0.0005202472027942722,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":20.45695240939525,"max_completion_cost":68.18984136465085,"max_cost":68.18984136465085},"per_request_limits":null,"top_provider":null,"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":null,"alias_ids":null,"forwarded_model_id":null},{"id":"laguna-m.1","name":"Laguna M.1","created":1777388504,"description":"PPQ.AI model","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Unknown","instruct_type":null},"pricing":{"prompt":1.25685e-07,"completion":4.189500000000001e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.01647378432,"max_completion_cost":0.05491261440000001,"max_cost":0.05491261440000001},"sats_pricing":{"prompt":0.00015607416083828163,"completion":0.0005202472027942722,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":20.45695240939525,"max_completion_cost":68.18984136465085,"max_cost":68.18984136465085},"per_request_limits":null,"top_provider":null,"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":null,"alias_ids":null,"forwarded_model_id":null},{"id":"claude-haiku-latest","name":"Anthropic Claude Haiku Latest","created":1777318492,"description":"This model always redirects to the latest model in the Anthropic Claude Haiku family.","context_length":200000,"architecture":{"modality":"text+image->text","input_modalities":["image","text"],"output_modalities":["text"],"tokenizer":"Router","instruct_type":null},"pricing":{"prompt":4.189500000000001e-07,"completion":2.09475e-06,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":3.99e-08,"input_cache_write":4.987500000000001e-07,"max_prompt_cost":0.08379000000000002,"max_completion_cost":0.13406400000000002,"max_cost":0.19104120000000002},"sats_pricing":{"prompt":0.0005202472027942722,"completion":0.0026012360139713604,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":4.954735264707353e-05,"input_cache_write":0.0006193419080884193,"max_prompt_cost":104.04944055885443,"max_completion_cost":166.47910489416708,"max_cost":237.23272447418807},"per_request_limits":null,"top_provider":{"context_length":200000,"max_completion_tokens":64000,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"~anthropic/claude-haiku-latest","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-mini-latest","name":"OpenAI GPT Mini Latest","created":1777318471,"description":"This model always redirects to the latest model in the OpenAI GPT Mini family.","context_length":400000,"architecture":{"modality":"text+image+file->text","input_modalities":["file","image","text"],"output_modalities":["text"],"tokenizer":"Router","instruct_type":null},"pricing":{"prompt":3.142125e-07,"completion":1.885275e-06,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":2.9925e-08,"input_cache_write":0.0,"max_prompt_cost":0.12568500000000002,"max_completion_cost":0.24131519999999998,"max_cost":0.326781},"sats_pricing":{"prompt":0.00039018540209570404,"completion":0.002341112412574224,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":3.7160514485305145e-05,"input_cache_write":0.0,"max_prompt_cost":156.07416083828164,"max_completion_cost":299.66238880950067,"max_cost":405.79281817953216},"per_request_limits":null,"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"~openai/gpt-mini-latest","alias_ids":null,"forwarded_model_id":null},{"id":"gemini-pro-latest","name":"Google Gemini Pro Latest","created":1777318451,"description":"This model always redirects to the latest model in the Google Gemini Pro family.","context_length":1048576,"architecture":{"modality":"text+image+file+audio+video->text","input_modalities":["audio","file","image","text","video"],"output_modalities":["text"],"tokenizer":"Router","instruct_type":null},"pricing":{"prompt":5.586e-07,"completion":3.3516000000000007e-06,"request":0.0,"image":7.98e-07,"web_search":0.005586000000000001,"internal_reasoning":4.7880000000000006e-06,"input_cache_read":7.98e-08,"input_cache_write":1.4962500000000002e-07,"max_prompt_cost":0.5857345536,"max_completion_cost":0.21965045760000004,"max_cost":0.7687766016},"sats_pricing":{"prompt":0.0006936629370590295,"completion":0.004161977622354178,"request":0.001,"image":0.0009909470529414707,"web_search":6.936629370590294,"internal_reasoning":0.005945682317648824,"input_cache_read":9.909470529414706e-05,"input_cache_write":0.00018580257242652574,"max_prompt_cost":727.3583078896089,"max_completion_cost":272.7593654586034,"max_cost":954.6577791051117},"per_request_limits":null,"top_provider":{"context_length":1048576,"max_completion_tokens":65536,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"~google/gemini-pro-latest","alias_ids":null,"forwarded_model_id":null},{"id":"kimi-latest","name":"MoonshotAI Kimi Latest","created":1777318428,"description":"This model always redirects to the latest model in the MoonshotAI Kimi family.","context_length":262142,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Router","instruct_type":null},"pricing":{"prompt":3.1002300000000005e-07,"completion":1.4621355e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":5.5860000000000005e-08,"input_cache_write":0.0,"max_prompt_cost":0.08127004926600001,"max_completion_cost":0.383287124241,"max_cost":0.383287124241},"sats_pricing":{"prompt":0.0003849829300677614,"completion":0.0018156627377520094,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":6.936629370590294e-05,"input_cache_write":0.0,"max_prompt_cost":100.9201952538231,"max_completion_cost":475.96146139978725,"max_cost":475.96146139978725},"per_request_limits":null,"top_provider":{"context_length":262142,"max_completion_tokens":262142,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"~moonshotai/kimi-latest","alias_ids":null,"forwarded_model_id":null},{"id":"gemini-flash-latest","name":"Google Gemini Flash Latest","created":1777318398,"description":"This model always redirects to the latest model in the Google Gemini Flash family.","context_length":1048576,"architecture":{"modality":"text+image+file+audio+video->text","input_modalities":["text","image","file","audio","video"],"output_modalities":["text"],"tokenizer":"Router","instruct_type":null},"pricing":{"prompt":1.3965e-07,"completion":8.379000000000002e-07,"request":0.0,"image":1.995e-07,"web_search":0.005586000000000001,"internal_reasoning":1.1970000000000001e-06,"input_cache_read":1.995e-08,"input_cache_write":3.325e-08,"max_prompt_cost":0.1464336384,"max_completion_cost":0.05491261440000001,"max_cost":0.1921941504},"sats_pricing":{"prompt":0.00017341573426475737,"completion":0.0010404944055885444,"request":0.001,"image":0.0002477367632353677,"web_search":6.936629370590294,"internal_reasoning":0.001486420579412206,"input_cache_read":2.4773676323536764e-05,"input_cache_write":4.1289460539227946e-05,"max_prompt_cost":181.83957697240223,"max_completion_cost":68.18984136465085,"max_cost":238.66444477627792},"per_request_limits":null,"top_provider":{"context_length":1048576,"max_completion_tokens":65536,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"~google/gemini-flash-latest","alias_ids":null,"forwarded_model_id":null},{"id":"claude-sonnet-latest","name":"Anthropic Claude Sonnet Latest","created":1777318368,"description":"This model always redirects to the latest model in the Anthropic Claude Sonnet family.","context_length":1000000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Router","instruct_type":null},"pricing":{"prompt":1.25685e-06,"completion":6.28425e-06,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":1.197e-07,"input_cache_write":1.49625e-06,"max_prompt_cost":1.25685,"max_completion_cost":0.804384,"max_cost":1.9003572},"sats_pricing":{"prompt":0.0015607416083828162,"completion":0.007803708041914081,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":0.00014864205794122058,"input_cache_write":0.0018580257242652575,"max_prompt_cost":1560.7416083828161,"max_completion_cost":998.8746293650023,"max_cost":2359.841311874818},"per_request_limits":null,"top_provider":{"context_length":1000000,"max_completion_tokens":128000,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"~anthropic/claude-sonnet-latest","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-latest","name":"OpenAI GPT Latest","created":1777318334,"description":"This model always redirects to the latest model in the OpenAI GPT family.","context_length":1050000,"architecture":{"modality":"text+image+file->text","input_modalities":["file","image","text"],"output_modalities":["text"],"tokenizer":"Router","instruct_type":null},"pricing":{"prompt":2.09475e-06,"completion":1.25685e-05,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":1.995e-07,"input_cache_write":0.0,"max_prompt_cost":2.1994875,"max_completion_cost":1.608768,"max_cost":3.5401275},"sats_pricing":{"prompt":0.0026012360139713604,"completion":0.015607416083828162,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":0.0002477367632353677,"input_cache_write":0.0,"max_prompt_cost":2731.2978146699284,"max_completion_cost":1997.7492587300046,"max_cost":4396.088863611599},"per_request_limits":null,"top_provider":{"context_length":1050000,"max_completion_tokens":128000,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"~openai/gpt-latest","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3.5-plus-20260420","name":"Qwen: Qwen3.5 Plus 2026-04-20","created":1777261368,"description":"Qwen3.5 Plus (April 2026) is a large-scale multimodal language model from Alibaba. It accepts text, image, and video input and produces text output, with a 1M token context window. This...","context_length":1000000,"architecture":{"modality":"text+image+video->text","input_modalities":["text","image","video"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":1.6758000000000002e-07,"completion":1.00548e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.16758000000000003,"max_completion_cost":0.06589513728,"max_cost":0.2224926144},"sats_pricing":{"prompt":0.00020809888111770884,"completion":0.001248593286706253,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":208.09888111770886,"max_completion_cost":81.827809637581,"max_cost":276.2887224823597},"per_request_limits":null,"top_provider":{"context_length":1000000,"max_completion_tokens":65536,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3.5-plus-20260420","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3.6-flash","name":"Qwen: Qwen3.6 Flash","created":1777261362,"description":"Qwen3.6 Flash is a fast, efficient language model from Alibaba's Qwen 3.6 series. It supports text, image, and video input with a 1M token context window. Tiered pricing kicks in...","context_length":1000000,"architecture":{"modality":"text+image+video->text","input_modalities":["text","image","video"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":1.0473750000000002e-07,"completion":6.28425e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":1.2468750000000002e-07,"max_prompt_cost":0.10473750000000003,"max_completion_cost":0.0411844608,"max_cost":0.13905788400000002},"sats_pricing":{"prompt":0.00013006180069856805,"completion":0.0007803708041914081,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.00015483547702210482,"max_prompt_cost":130.06180069856805,"max_completion_cost":51.14238102348812,"max_cost":172.6804515514748},"per_request_limits":null,"top_provider":{"context_length":1000000,"max_completion_tokens":65536,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3.6-flash","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3.6-35b-a3b","name":"Qwen: Qwen3.6 35B A3B","created":1777260255,"description":"Qwen3.6-35B-A3B is an open-weight multimodal model from Alibaba Cloud with 35 billion total parameters and 3 billion active parameters per token. It uses a hybrid sparse mixture-of-experts architecture combining Gated...","context_length":262144,"architecture":{"modality":"text+image+video->text","input_modalities":["text","image","video"],"output_modalities":["text"],"tokenizer":"Qwen","instruct_type":null},"pricing":{"prompt":6.753474e-08,"completion":4.043914875e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.995e-08,"input_cache_write":0.0,"max_prompt_cost":0.01770382688256,"max_completion_cost":0.1060088020992,"max_cost":0.1060088020992},"sats_pricing":{"prompt":8.386384909043665e-05,"completion":0.000502168612497171,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":2.4773676323536764e-05,"input_cache_write":0.0,"max_prompt_cost":21.984404855963426,"max_completion_cost":131.6404887544584,"max_cost":131.6404887544584},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":262144,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3.6-35b-a3b-20260415","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3.6-max-preview","name":"Qwen: Qwen3.6 Max Preview","created":1777260242,"description":"Qwen3.6-Max-Preview is a proprietary frontier model from Alibaba Cloud built on a sparse mixture-of-experts architecture with approximately 1 trillion total parameters. It is optimized for agentic coding, tool use, and...","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen","instruct_type":null},"pricing":{"prompt":4.357080000000001e-07,"completion":2.614248e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":5.187e-07,"max_prompt_cost":0.11421823795200002,"max_completion_cost":0.171327356928,"max_cost":0.256991035392},"sats_pricing":{"prompt":0.0005410570909060431,"completion":0.0032463425454362575,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0006441155844119559,"max_prompt_cost":141.83487003847375,"max_completion_cost":212.75230505771057,"max_cost":319.1284575865659},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":65536,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3.6-max-preview-20260420","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3.6-27b","name":"Qwen: Qwen3.6 27B","created":1777255064,"description":"Qwen3.6 27B is a dense 27-billion-parameter language model from the Qwen Team at Alibaba, released in April 2026. It features hybrid multimodal capabilities — accepting text, image, and video inputs...","context_length":262144,"architecture":{"modality":"text+image+video->text","input_modalities":["text","image","video"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":1.3406400000000003e-07,"completion":1.3406400000000002e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.03514407321600001,"max_completion_cost":0.10982522880000001,"max_cost":0.13398677913600002},"sats_pricing":{"prompt":0.00016647910489416709,"completion":0.0016647910489416707,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":43.64149847337654,"max_completion_cost":136.37968272930166,"max_cost":166.38321292974803},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":81920,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3.6-27b-20260422","alias_ids":null,"forwarded_model_id":null},{"id":"deepseek-v4-pro","name":"DeepSeek: DeepSeek V4 Pro","created":1777000679,"description":"DeepSeek V4 Pro is a large-scale Mixture-of-Experts model from DeepSeek with 1.6T total parameters and 49B activated parameters, supporting a 1M-token context window. It is designed for advanced reasoning, coding,...","context_length":1048576,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek","instruct_type":null},"pricing":{"prompt":1.8224325e-07,"completion":3.644865e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.4463750000000002e-09,"input_cache_write":0.0,"max_prompt_cost":0.191095898112,"max_completion_cost":0.13996281600000002,"max_cost":0.26107730611200003},"sats_pricing":{"prompt":0.00022630753321550834,"completion":0.0004526150664310167,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.7960915334564156e-06,"input_cache_write":0.0,"max_prompt_cost":237.30064794898487,"max_completion_cost":173.80418550951043,"max_cost":324.2027407037401},"per_request_limits":null,"top_provider":{"context_length":1048576,"max_completion_tokens":384000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"deepseek/deepseek-v4-pro-20260423","alias_ids":null,"forwarded_model_id":null},{"id":"deepseek-v4-flash","name":"DeepSeek: DeepSeek V4 Flash","created":1777000666,"description":"DeepSeek V4 Flash is an efficiency-optimized Mixture-of-Experts model from DeepSeek with 284B total parameters and 13B activated parameters, supporting a 1M-token context window. It is designed for fast inference and...","context_length":1048576,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek","instruct_type":null},"pricing":{"prompt":5.8652999999999996e-08,"completion":1.1730599999999999e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.1172e-09,"input_cache_write":0.0,"max_prompt_cost":0.061502128127999996,"max_completion_cost":0.045045504,"max_cost":0.084024880128},"sats_pricing":{"prompt":7.283460839119808e-05,"completion":0.00014566921678239617,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.387325874118059e-06,"input_cache_write":0.0,"max_prompt_cost":76.37262232840892,"max_completion_cost":55.93697924444013,"max_cost":104.34111195062899},"per_request_limits":null,"top_provider":{"context_length":1048576,"max_completion_tokens":384000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"deepseek/deepseek-v4-flash-20260423","alias_ids":null,"forwarded_model_id":null},{"id":"ling-2.6-1t","name":"Ling-2.6-1T","created":1776948238,"description":"PPQ.AI model","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Unknown","instruct_type":null},"pricing":{"prompt":1.25685e-07,"completion":4.189500000000001e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.03294756864,"max_completion_cost":0.10982522880000002,"max_cost":0.10982522880000002},"sats_pricing":{"prompt":0.00015607416083828163,"completion":0.0005202472027942722,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":40.9139048187905,"max_completion_cost":136.3796827293017,"max_cost":136.3796827293017},"per_request_limits":null,"top_provider":null,"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":null,"alias_ids":null,"forwarded_model_id":null},{"id":"hy3-preview","name":"Hy3 preview","created":1776878150,"description":"PPQ.AI model","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Unknown","instruct_type":null},"pricing":{"prompt":1.25685e-07,"completion":4.189500000000001e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.03294756864,"max_completion_cost":0.10982522880000002,"max_cost":0.10982522880000002},"sats_pricing":{"prompt":0.00015607416083828163,"completion":0.0005202472027942722,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":40.9139048187905,"max_completion_cost":136.3796827293017,"max_cost":136.3796827293017},"per_request_limits":null,"top_provider":null,"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":null,"alias_ids":null,"forwarded_model_id":null},{"id":"mimo-v2.5-pro","name":"Xiaomi: MiMo-V2.5-Pro","created":1776874273,"description":"MiMo-V2.5-Pro is Xiaomi’s flagship model, delivering strong performance in general agentic capabilities, complex software engineering, and long-horizon tasks, with top rankings on benchmarks such as ClawEval, GDPVal, and SWE-bench Pro....","context_length":1048576,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":4.189500000000001e-07,"completion":1.25685e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":7.98e-08,"input_cache_write":0.0,"max_prompt_cost":0.4393009152000001,"max_completion_cost":0.1647378432,"max_cost":0.5491261440000001},"sats_pricing":{"prompt":0.0005202472027942722,"completion":0.0015607416083828162,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":9.909470529414706e-05,"input_cache_write":0.0,"max_prompt_cost":545.5187309172068,"max_completion_cost":204.56952409395248,"max_cost":681.8984136465084},"per_request_limits":null,"top_provider":{"context_length":1048576,"max_completion_tokens":131072,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"xiaomi/mimo-v2.5-pro-20260422","alias_ids":null,"forwarded_model_id":null},{"id":"mimo-v2.5","name":"Xiaomi: MiMo-V2.5","created":1776874269,"description":"MiMo-V2.5 is a native omnimodal model by Xiaomi. It delivers Pro-level agentic performance at roughly half the inference cost, while surpassing MiMo-V2-Omni in multimodal perception across image and video understanding...","context_length":1048576,"architecture":{"modality":"text+image+audio+video->text","input_modalities":["text","audio","image","video"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":1.6758000000000002e-07,"completion":8.379000000000002e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":3.1920000000000004e-08,"input_cache_write":0.0,"max_prompt_cost":0.17572036608000002,"max_completion_cost":0.10982522880000002,"max_cost":0.26358054912000006},"sats_pricing":{"prompt":0.00020809888111770884,"completion":0.0010404944055885444,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":3.9637882117658825e-05,"input_cache_write":0.0,"max_prompt_cost":218.20749236688266,"max_completion_cost":136.3796827293017,"max_cost":327.31123855032405},"per_request_limits":null,"top_provider":{"context_length":1048576,"max_completion_tokens":131072,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"xiaomi/mimo-v2.5-20260422","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-5.4-image-2","name":"OpenAI: GPT-5.4 Image 2","created":1776797528,"description":"[GPT-5.4](https://openrouter.ai/openai/gpt-5.4) Image 2 combines OpenAI's GPT-5.4 model with state-of-the-art image generation capabilities from GPT Image 2. It enables rich multimodal workflows, allowing users to seamlessly move between reasoning, coding, and...","context_length":272000,"architecture":{"modality":"text+image+file->text+image","input_modalities":["image","text","file"],"output_modalities":["image","text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":3.3516000000000007e-06,"completion":6.28425e-06,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":7.98e-07,"input_cache_write":0.0,"max_prompt_cost":0.9116352000000002,"max_completion_cost":0.804384,"max_cost":1.2870144000000001},"sats_pricing":{"prompt":0.004161977622354178,"completion":0.007803708041914081,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":0.0009909470529414707,"input_cache_write":0.0,"max_prompt_cost":1132.0579132803362,"max_completion_cost":998.8746293650023,"max_cost":1598.1994069840039},"per_request_limits":null,"top_provider":{"context_length":272000,"max_completion_tokens":128000,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-5.4-image-2-20260421","alias_ids":null,"forwarded_model_id":null},{"id":"ling-2.6-flash","name":"inclusionAI: Ling-2.6-flash","created":1776795886,"description":"Ling-2.6-flash is an instant (instruct) model from inclusionAI with 104B total parameters and 7.4B active parameters, designed for real-world agents that require fast responses, strong execution, and high token efficiency....","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":3.351600000000001e-08,"completion":1.00548e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":6.384000000000001e-09,"input_cache_write":0.0,"max_prompt_cost":0.008786018304000002,"max_completion_cost":0.003294756864,"max_cost":0.010982522880000001},"sats_pricing":{"prompt":4.161977622354177e-05,"completion":0.00012485932867062528,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":7.927576423531765e-06,"input_cache_write":0.0,"max_prompt_cost":10.910374618344134,"max_completion_cost":4.091390481879049,"max_cost":13.637968272930166},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":32768,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"inclusionai/ling-2.6-flash-20260421","alias_ids":null,"forwarded_model_id":null},{"id":"claude-opus-latest","name":"Anthropic: Claude Opus Latest","created":1776795361,"description":"This model always redirects to the latest model in the Claude Opus family.","context_length":1000000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Router","instruct_type":null},"pricing":{"prompt":2.09475e-06,"completion":1.0473750000000001e-05,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":1.995e-07,"input_cache_write":2.49375e-06,"max_prompt_cost":2.0947500000000003,"max_completion_cost":1.34064,"max_cost":3.167262},"sats_pricing":{"prompt":0.0026012360139713604,"completion":0.013006180069856802,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":0.0002477367632353677,"input_cache_write":0.0030967095404420956,"max_prompt_cost":2601.2360139713605,"max_completion_cost":1664.7910489416706,"max_cost":3933.0688531246965},"per_request_limits":null,"top_provider":{"context_length":1000000,"max_completion_tokens":128000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"~anthropic/claude-opus-latest","alias_ids":null,"forwarded_model_id":null},{"id":"qianfan-ocr-fast","name":"Qianfan-OCR-Fast","created":1776707472,"description":"PPQ.AI model","context_length":65536,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Unknown","instruct_type":null},"pricing":{"prompt":1.25685e-07,"completion":4.189500000000001e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.00823689216,"max_completion_cost":0.027456307200000005,"max_cost":0.027456307200000005},"sats_pricing":{"prompt":0.00015607416083828163,"completion":0.0005202472027942722,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":10.228476204697625,"max_completion_cost":34.09492068232542,"max_cost":34.09492068232542},"per_request_limits":null,"top_provider":null,"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":null,"alias_ids":null,"forwarded_model_id":null},{"id":"kimi-k2.6","name":"MoonshotAI: Kimi K2.6","created":1776699402,"description":"Kimi K2.6 is Moonshot AI's next-generation multimodal model, designed for long-horizon coding, coding-driven UI/UX generation, and multi-agent orchestration. It handles complex end-to-end coding tasks across Python, Rust, and Go, and...","context_length":262142,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":3.1002300000000005e-07,"completion":1.4621355e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":5.5860000000000005e-08,"input_cache_write":0.0,"max_prompt_cost":0.08127004926600001,"max_completion_cost":0.383287124241,"max_cost":0.383287124241},"sats_pricing":{"prompt":0.0003849829300677614,"completion":0.0018156627377520094,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":6.936629370590294e-05,"input_cache_write":0.0,"max_prompt_cost":100.9201952538231,"max_completion_cost":475.96146139978725,"max_cost":475.96146139978725},"per_request_limits":null,"top_provider":{"context_length":262142,"max_completion_tokens":262142,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"moonshotai/kimi-k2.6-20260420","alias_ids":null,"forwarded_model_id":null},{"id":"claude-opus-4.6-fast","name":"Anthropic: Claude Opus 4.6 (Fast)","created":1775592472,"description":"Fast-mode variant of [Opus 4.6](/anthropic/claude-opus-4.6) - identical capabilities with higher output speed at premium 6x pricing.\n\nLearn more in Anthropic's docs: https://platform.claude.com/docs/en/build-with-claude/fast-mode","context_length":1000000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Claude","instruct_type":null},"pricing":{"prompt":1.25685e-05,"completion":6.28425e-05,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":1.1970000000000001e-06,"input_cache_write":1.49625e-05,"max_prompt_cost":12.5685,"max_completion_cost":8.04384,"max_cost":19.003572},"sats_pricing":{"prompt":0.015607416083828162,"completion":0.07803708041914081,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":0.001486420579412206,"input_cache_write":0.01858025724265257,"max_prompt_cost":15607.416083828162,"max_completion_cost":9988.746293650023,"max_cost":23598.413118748176},"per_request_limits":null,"top_provider":{"context_length":1000000,"max_completion_tokens":128000,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"anthropic/claude-4.6-opus-fast-20260407","alias_ids":null,"forwarded_model_id":null},{"id":"glm-5.1","name":"Z.ai: GLM 5.1","created":1775578025,"description":"GLM-5.1 delivers a major leap in coding capability, with particularly significant gains in handling long-horizon tasks. Unlike previous models built around minute-level interactions, GLM-5.1 can work independently and continuously on...","context_length":202752,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":4.3989750000000005e-07,"completion":1.466325e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":2.09475e-07,"input_cache_write":0.0,"max_prompt_cost":0.08919009792000002,"max_completion_cost":0.096095608875,"max_cost":0.1564570241325},"sats_pricing":{"prompt":0.0005462595629339857,"completion":0.0018208652097799524,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.000260123601397136,"input_cache_write":0.0,"max_prompt_cost":110.75521890399148,"max_completion_cost":119.33040152292917,"max_cost":194.2864999700419},"per_request_limits":null,"top_provider":{"context_length":202752,"max_completion_tokens":65535,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"z-ai/glm-5.1-20260406","alias_ids":null,"forwarded_model_id":null},{"id":"gemma-4-26b-a4b-it","name":"Google: Gemma 4 26B A4B ","created":1775227989,"description":"Gemma 4 26B A4B IT is an instruction-tuned Mixture-of-Experts (MoE) model from Google DeepMind. Despite 25.2B total parameters, only 3.8B activate per token during inference — delivering near-31B quality at...","context_length":262144,"architecture":{"modality":"text+image+video->text","input_modalities":["image","text","video"],"output_modalities":["text"],"tokenizer":"Gemma","instruct_type":null},"pricing":{"prompt":2.5137e-08,"completion":1.382535e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.006589513728,"max_completion_cost":0.036242325504,"max_cost":0.036242325504},"sats_pricing":{"prompt":3.121483216765632e-05,"completion":0.00017168157692210977,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":8.182780963758098,"max_completion_cost":45.005295300669545,"max_cost":45.005295300669545},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"google/gemma-4-26b-a4b-it-20260403","alias_ids":null,"forwarded_model_id":null},{"id":"gemma-4-31b-it","name":"Google: Gemma 4 31B","created":1775148486,"description":"Gemma 4 31B Instruct is Google DeepMind's 30.7B dense multimodal model supporting text and image input with text output. Features a 256K token context window, configurable thinking/reasoning mode, native function...","context_length":262144,"architecture":{"modality":"text+image+video->text","input_modalities":["image","text","video"],"output_modalities":["text"],"tokenizer":"Gemma","instruct_type":null},"pricing":{"prompt":5.446350000000001e-08,"completion":1.5920100000000002e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.014277279744000003,"max_completion_cost":0.0026083491840000004,"max_cost":0.015993298944000003},"sats_pricing":{"prompt":6.763213636325538e-05,"completion":0.0001976939370618234,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":17.72935875480922,"max_completion_cost":3.2390174648209147,"max_cost":19.860291297454555},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"google/gemma-4-31b-it-20260402","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3.6-plus","name":"Qwen: Qwen3.6 Plus","created":1775133557,"description":"Qwen 3.6 Plus builds on a hybrid architecture that combines efficient linear attention with sparse mixture-of-experts routing, enabling strong scalability and high-performance inference. Compared to the 3.5 series, it delivers...","context_length":1000000,"architecture":{"modality":"text+image+video->text","input_modalities":["text","image","video"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":1.3615875000000001e-07,"completion":8.169525e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":1.6209375e-07,"max_prompt_cost":0.13615875000000002,"max_completion_cost":0.05353979904,"max_cost":0.18077524920000002},"sats_pricing":{"prompt":0.00016908034090813844,"completion":0.0010144820454488306,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0002012861201287362,"max_prompt_cost":169.08034090813845,"max_completion_cost":66.48509533053456,"max_cost":224.48458701691723},"per_request_limits":null,"top_provider":{"context_length":1000000,"max_completion_tokens":65536,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3.6-plus-04-02","alias_ids":null,"forwarded_model_id":null},{"id":"glm-5v-turbo","name":"Z.ai: GLM 5V Turbo","created":1775061458,"description":"GLM-5V-Turbo is Z.ai’s first native multimodal agent foundation model, built for vision-based coding and agent-driven tasks. It natively handles image, video, and text inputs, excels at long-horizon planning, complex coding,...","context_length":202752,"architecture":{"modality":"text+image+video->text","input_modalities":["image","text","video"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":5.0274e-07,"completion":1.6758000000000003e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":9.575999999999999e-08,"input_cache_write":0.0,"max_prompt_cost":0.10193154048,"max_completion_cost":0.21965045760000004,"max_cost":0.2556868608},"sats_pricing":{"prompt":0.0006242966433531265,"completion":0.002080988811177089,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.00011891364635297646,"input_cache_write":0.0,"max_prompt_cost":126.57739303313309,"max_completion_cost":272.7593654586034,"max_cost":317.50894885415545},"per_request_limits":null,"top_provider":{"context_length":202752,"max_completion_tokens":131072,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"z-ai/glm-5v-turbo-20260401","alias_ids":null,"forwarded_model_id":null},{"id":"trinity-large-thinking","name":"Arcee AI: Trinity Large Thinking","created":1775058318,"description":"Trinity Large Thinking is a powerful open source reasoning model from the team at Arcee AI. It shows strong performance in PinchBench, agentic workloads, and reasoning tasks. Launch video: https://youtu.be/Gc82AXLa0Rg?si=4RLn6WBz33qT--B7","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":9.216900000000001e-08,"completion":3.561075e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":2.3939999999999998e-08,"input_cache_write":0.0,"max_prompt_cost":0.024161550336000003,"max_completion_cost":0.09335144448,"max_cost":0.09335144448},"sats_pricing":{"prompt":0.00011445438461473986,"completion":0.00044221012237513125,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":2.9728411588244116e-05,"input_cache_write":0.0,"max_prompt_cost":30.003530200446367,"max_completion_cost":115.9227303199064,"max_cost":115.9227303199064},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":262144,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"arcee-ai/trinity-large-thinking","alias_ids":null,"forwarded_model_id":null},{"id":"grok-4.20-multi-agent","name":"xAI: Grok 4.20 Multi-Agent","created":1774979158,"description":"Grok 4.20 Multi-Agent is a variant of xAI’s Grok 4.20 designed for collaborative, agent-based workflows. Multiple agents operate in parallel to conduct deep research, coordinate tool use, and synthesize information...","context_length":2000000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Grok","instruct_type":null},"pricing":{"prompt":8.379000000000002e-07,"completion":2.5137e-06,"request":0.0,"image":0.0,"web_search":0.0019950000000000002,"internal_reasoning":0.0,"input_cache_read":7.98e-08,"input_cache_write":0.0,"max_prompt_cost":1.6758000000000004,"max_completion_cost":5.0274,"max_cost":5.0274},"sats_pricing":{"prompt":0.0010404944055885444,"completion":0.0031214832167656323,"request":0.001,"image":0.0,"web_search":2.4773676323536766,"internal_reasoning":0.0,"input_cache_read":9.909470529414706e-05,"input_cache_write":0.0,"max_prompt_cost":2080.988811177089,"max_completion_cost":6242.966433531265,"max_cost":6242.966433531265},"per_request_limits":null,"top_provider":{"context_length":2000000,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"x-ai/grok-4.20-multi-agent-20260309","alias_ids":null,"forwarded_model_id":null},{"id":"lyria-3-pro-preview","name":"Lyria 3 Pro Preview","created":1774907286,"description":"PPQ.AI model","context_length":1048576,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Unknown","instruct_type":null},"pricing":{"prompt":0.0,"completion":0.0,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.0,"max_completion_cost":0.0,"max_cost":0.0},"sats_pricing":{"prompt":0.0,"completion":0.0,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.0,"max_completion_cost":0.0,"max_cost":0.001},"per_request_limits":null,"top_provider":null,"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":null,"alias_ids":null,"forwarded_model_id":null},{"id":"lyria-3-clip-preview","name":"Lyria 3 Clip Preview","created":1774907255,"description":"PPQ.AI model","context_length":1048576,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Unknown","instruct_type":null},"pricing":{"prompt":0.0,"completion":0.0,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.0,"max_completion_cost":0.0,"max_cost":0.0},"sats_pricing":{"prompt":0.0,"completion":0.0,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.0,"max_completion_cost":0.0,"max_cost":0.001},"per_request_limits":null,"top_provider":null,"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":null,"alias_ids":null,"forwarded_model_id":null},{"id":"kat-coder-pro-v2","name":"Kwaipilot: KAT-Coder-Pro V2","created":1774649310,"description":"KAT-Coder-Pro V2 is the latest high-performance model in KwaiKAT’s KAT-Coder series, designed for complex enterprise-grade software engineering and SaaS integration. It builds on the agentic coding strengths of earlier versions,...","context_length":256000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":1.25685e-07,"completion":5.0274e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":2.3939999999999998e-08,"input_cache_write":0.0,"max_prompt_cost":0.03217536,"max_completion_cost":0.040219200000000004,"max_cost":0.06233976000000001},"sats_pricing":{"prompt":0.00015607416083828163,"completion":0.0006242966433531265,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":2.9728411588244116e-05,"input_cache_write":0.0,"max_prompt_cost":39.9549851746001,"max_completion_cost":49.94373146825012,"max_cost":77.41278377578769},"per_request_limits":null,"top_provider":{"context_length":256000,"max_completion_tokens":80000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"kwaipilot/kat-coder-pro-v2-20260327","alias_ids":null,"forwarded_model_id":null},{"id":"reka-edge","name":"Reka Edge","created":1774026965,"description":"Reka Edge is an extremely efficient 7B multimodal vision-language model that accepts image/video+text inputs and generates text outputs. This model is optimized specifically to deliver industry-leading performance in image understanding,...","context_length":16384,"architecture":{"modality":"text+image+video->text","input_modalities":["image","text","video"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":4.1895000000000006e-08,"completion":4.1895000000000006e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.0006864076800000001,"max_completion_cost":0.0006864076800000001,"max_cost":0.0006864076800000001},"sats_pricing":{"prompt":5.202472027942721e-05,"completion":5.202472027942721e-05,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.8523730170581354,"max_completion_cost":0.8523730170581354,"max_cost":0.8523730170581354},"per_request_limits":null,"top_provider":{"context_length":16384,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"rekaai/reka-edge-2603","alias_ids":null,"forwarded_model_id":null},{"id":"mimo-v2-omni","name":"Xiaomi: MiMo-V2-Omni","created":1773863703,"description":"MiMo-V2-Omni is a frontier omni-modal model that natively processes image, video, and audio inputs within a unified architecture. It combines strong multimodal perception with agentic capability - visual grounding, multi-step...","context_length":262144,"architecture":{"modality":"text+image+audio+video->text","input_modalities":["text","audio","image","video"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":1.6758000000000002e-07,"completion":8.379000000000002e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":3.1920000000000004e-08,"input_cache_write":0.0,"max_prompt_cost":0.043930091520000006,"max_completion_cost":0.05491261440000001,"max_cost":0.08786018304000001},"sats_pricing":{"prompt":0.00020809888111770884,"completion":0.0010404944055885444,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":3.9637882117658825e-05,"input_cache_write":0.0,"max_prompt_cost":54.551873091720665,"max_completion_cost":68.18984136465085,"max_cost":109.10374618344133},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":65536,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"xiaomi/mimo-v2-omni-20260318","alias_ids":null,"forwarded_model_id":null},{"id":"mimo-v2-pro","name":"Xiaomi: MiMo-V2-Pro","created":1773863643,"description":"MiMo-V2-Pro is Xiaomi's flagship foundation model, featuring over 1T total parameters and a 1M context length, deeply optimized for agentic scenarios. It is highly adaptable to general agent frameworks like...","context_length":1048576,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":4.189500000000001e-07,"completion":1.25685e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":7.98e-08,"input_cache_write":0.0,"max_prompt_cost":0.4393009152000001,"max_completion_cost":0.1647378432,"max_cost":0.5491261440000001},"sats_pricing":{"prompt":0.0005202472027942722,"completion":0.0015607416083828162,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":9.909470529414706e-05,"input_cache_write":0.0,"max_prompt_cost":545.5187309172068,"max_completion_cost":204.56952409395248,"max_cost":681.8984136465084},"per_request_limits":null,"top_provider":{"context_length":1048576,"max_completion_tokens":131072,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"xiaomi/mimo-v2-pro-20260318","alias_ids":null,"forwarded_model_id":null},{"id":"minimax-m2.7","name":"MiniMax: MiniMax M2.7","created":1773836697,"description":"MiniMax-M2.7 is a next-generation large language model designed for autonomous, real-world productivity and continuous improvement. Built to actively participate in its own evolution, M2.7 integrates advanced agentic capabilities through multi-agent...","context_length":196608,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":1.25685e-07,"completion":5.0274e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":2.3541000000000002e-08,"input_cache_write":0.0,"max_prompt_cost":0.02471067648,"max_completion_cost":0.09884270592,"max_cost":0.09884270592},"sats_pricing":{"prompt":0.00015607416083828163,"completion":0.0006242966433531265,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":2.9232938061773384e-05,"input_cache_write":0.0,"max_prompt_cost":30.68542861409287,"max_completion_cost":122.74171445637148,"max_cost":122.74171445637148},"per_request_limits":null,"top_provider":{"context_length":196608,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"minimax/minimax-m2.7-20260318","alias_ids":null,"forwarded_model_id":null},{"id":"mistral-small-2603","name":"Mistral: Mistral Small 4","created":1773695685,"description":"Mistral Small 4 is the next major release in the Mistral Small family, unifying the capabilities of several flagship Mistral models into a single system. It combines strong reasoning from...","context_length":262144,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":6.28425e-08,"completion":2.5137e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":5.9849999999999995e-09,"input_cache_write":0.0,"max_prompt_cost":0.01647378432,"max_completion_cost":0.06589513728,"max_cost":0.06589513728},"sats_pricing":{"prompt":7.803708041914081e-05,"completion":0.00031214832167656326,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":7.432102897061029e-06,"input_cache_write":0.0,"max_prompt_cost":20.45695240939525,"max_completion_cost":81.827809637581,"max_cost":81.827809637581},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"mistralai/mistral-small-2603","alias_ids":null,"forwarded_model_id":null},{"id":"glm-5-turbo","name":"Z.ai: GLM 5 Turbo","created":1773583573,"description":"GLM-5 Turbo is a new model from Z.ai designed for fast inference and strong performance in agent-driven environments such as OpenClaw scenarios. It is deeply optimized for real-world agent workflows...","context_length":202752,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":5.0274e-07,"completion":1.6758000000000003e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":9.575999999999999e-08,"input_cache_write":0.0,"max_prompt_cost":0.10193154048,"max_completion_cost":0.21965045760000004,"max_cost":0.2556868608},"sats_pricing":{"prompt":0.0006242966433531265,"completion":0.002080988811177089,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.00011891364635297646,"input_cache_write":0.0,"max_prompt_cost":126.57739303313309,"max_completion_cost":272.7593654586034,"max_cost":317.50894885415545},"per_request_limits":null,"top_provider":{"context_length":202752,"max_completion_tokens":131072,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"z-ai/glm-5-turbo-20260315","alias_ids":null,"forwarded_model_id":null},{"id":"nemotron-3-super-120b-a12b","name":"NVIDIA: Nemotron 3 Super","created":1773245239,"description":"NVIDIA Nemotron 3 Super is a 120B-parameter open hybrid MoE model, activating just 12B parameters for maximum compute efficiency and accuracy in complex multi-agent applications. Built on a hybrid Mamba-Transformer...","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":3.7705500000000006e-08,"completion":1.885275e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.009884270592000002,"max_completion_cost":0.04942135296,"max_cost":0.04942135296},"sats_pricing":{"prompt":4.6822248251484494e-05,"completion":0.00023411124125742241,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":12.274171445637151,"max_completion_cost":61.37085722818574,"max_cost":61.37085722818574},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"nvidia/nemotron-3-super-120b-a12b-20230311","alias_ids":null,"forwarded_model_id":null},{"id":"seed-2.0-lite","name":"ByteDance Seed: Seed-2.0-Lite","created":1773157231,"description":"Seed-2.0-Lite is a versatile, cost‑efficient enterprise workhorse that delivers strong multimodal and agent capabilities while offering noticeably lower latency, making it a practical default choice for most production workloads across...","context_length":262144,"architecture":{"modality":"text+image+video->text","input_modalities":["text","image","video"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":1.0473750000000002e-07,"completion":8.379000000000002e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.027456307200000005,"max_completion_cost":0.10982522880000002,"max_cost":0.12355338240000002},"sats_pricing":{"prompt":0.00013006180069856805,"completion":0.0010404944055885444,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":34.09492068232542,"max_completion_cost":136.3796827293017,"max_cost":153.42714307046438},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":131072,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"bytedance-seed/seed-2.0-lite-20260309","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3.5-9b","name":"Qwen: Qwen3.5-9B","created":1773152396,"description":"Qwen3.5-9B is a multimodal foundation model from the Qwen3.5 family, designed to deliver strong reasoning, coding, and visual understanding in an efficient 9B-parameter architecture. It uses a unified vision-language design...","context_length":262144,"architecture":{"modality":"text+image+video->text","input_modalities":["text","image","video"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":4.1895000000000006e-08,"completion":6.28425e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.010982522880000001,"max_completion_cost":0.01647378432,"max_cost":0.01647378432},"sats_pricing":{"prompt":5.202472027942721e-05,"completion":7.803708041914081e-05,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":13.637968272930166,"max_completion_cost":20.45695240939525,"max_cost":20.45695240939525},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3.5-9b-20260310","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-5.4-pro","name":"OpenAI: GPT-5.4 Pro","created":1772734366,"description":"GPT-5.4 Pro is OpenAI's most advanced model, building on GPT-5.4's unified architecture with enhanced reasoning capabilities for complex, high-stakes tasks. It features a 1M+ token context window (922K input, 128K...","context_length":1050000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":1.25685e-05,"completion":7.541100000000001e-05,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":13.196925,"max_completion_cost":9.652608000000003,"max_cost":21.240765000000003},"sats_pricing":{"prompt":0.015607416083828162,"completion":0.09364449650296898,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":16387.78688801957,"max_completion_cost":11986.49555238003,"max_cost":26376.533181669598},"per_request_limits":null,"top_provider":{"context_length":1050000,"max_completion_tokens":128000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-5.4-pro-20260305","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-5.4","name":"OpenAI: GPT-5.4","created":1772734352,"description":"GPT-5.4 is OpenAI’s latest frontier model, unifying the Codex and GPT lines into a single system. It features a 1M+ token context window (922K input, 128K output) with support for...","context_length":1050000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":1.047375e-06,"completion":6.28425e-06,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":9.975e-08,"input_cache_write":0.0,"max_prompt_cost":1.09974375,"max_completion_cost":0.804384,"max_cost":1.77006375},"sats_pricing":{"prompt":0.0013006180069856802,"completion":0.007803708041914081,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":0.00012386838161768384,"input_cache_write":0.0,"max_prompt_cost":1365.6489073349642,"max_completion_cost":998.8746293650023,"max_cost":2198.0444318057994},"per_request_limits":null,"top_provider":{"context_length":1050000,"max_completion_tokens":128000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-5.4-20260305","alias_ids":null,"forwarded_model_id":null},{"id":"mercury-2","name":"Inception: Mercury 2","created":1772636275,"description":"Mercury 2 is an extremely fast reasoning LLM, and the first reasoning diffusion LLM (dLLM). Instead of generating tokens sequentially, Mercury 2 produces and refines multiple tokens in parallel, achieving...","context_length":128000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":1.0473750000000002e-07,"completion":3.142125e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":9.975e-09,"input_cache_write":0.0,"max_prompt_cost":0.013406400000000002,"max_completion_cost":0.015710625000000002,"max_cost":0.023880150000000003},"sats_pricing":{"prompt":0.00013006180069856805,"completion":0.00039018540209570404,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.2386838161768382e-05,"input_cache_write":0.0,"max_prompt_cost":16.64791048941671,"max_completion_cost":19.509270104785205,"max_cost":29.65409055927351},"per_request_limits":null,"top_provider":{"context_length":128000,"max_completion_tokens":50000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"inception/mercury-2-20260304","alias_ids":null,"forwarded_model_id":null},{"id":"gemini-3.1-flash-lite-preview","name":"Google: Gemini 3.1 Flash Lite Preview","created":1772512673,"description":"Gemini 3.1 Flash Lite Preview is Google's high-efficiency model optimized for high-volume use cases. It outperforms Gemini 2.5 Flash Lite on overall quality and approaches Gemini 2.5 Flash performance across...","context_length":1048576,"architecture":{"modality":"text+image+file+audio+video->text","input_modalities":["text","image","video","file","audio"],"output_modalities":["text"],"tokenizer":"Gemini","instruct_type":null},"pricing":{"prompt":6.9825e-08,"completion":4.189500000000001e-07,"request":0.0,"image":9.975e-08,"web_search":0.005586000000000001,"internal_reasoning":5.985000000000001e-07,"input_cache_read":9.975e-09,"input_cache_write":3.325e-08,"max_prompt_cost":0.0732168192,"max_completion_cost":0.027456307200000005,"max_cost":0.0960970752},"sats_pricing":{"prompt":8.670786713237869e-05,"completion":0.0005202472027942722,"request":0.001,"image":0.00012386838161768384,"web_search":6.936629370590294,"internal_reasoning":0.000743210289706103,"input_cache_read":1.2386838161768382e-05,"input_cache_write":4.1289460539227946e-05,"max_prompt_cost":90.91978848620111,"max_completion_cost":34.09492068232542,"max_cost":119.33222238813896},"per_request_limits":null,"top_provider":{"context_length":1048576,"max_completion_tokens":65536,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"google/gemini-3.1-flash-lite-preview-20260303","alias_ids":null,"forwarded_model_id":null},{"id":"seed-2.0-mini","name":"ByteDance Seed: Seed-2.0-Mini","created":1772131107,"description":"Seed-2.0-mini targets latency-sensitive, high-concurrency, and cost-sensitive scenarios, emphasizing fast response and flexible inference deployment. It delivers performance comparable to ByteDance-Seed-1.6, supports 256k context, four reasoning effort modes (minimal/low/medium/high), multimodal understanding,...","context_length":262144,"architecture":{"modality":"text+image+video->text","input_modalities":["text","image","video"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":4.1895000000000006e-08,"completion":1.6758000000000002e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.010982522880000001,"max_completion_cost":0.021965045760000003,"max_cost":0.027456307200000002},"sats_pricing":{"prompt":5.202472027942721e-05,"completion":0.00020809888111770884,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":13.637968272930166,"max_completion_cost":27.275936545860333,"max_cost":34.094920682325416},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":131072,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"bytedance-seed/seed-2.0-mini-20260224","alias_ids":null,"forwarded_model_id":null},{"id":"gemini-3.1-flash-image-preview","name":"Google: Nano Banana 2 (Gemini 3.1 Flash Image Preview)","created":1772119558,"description":"Gemini 3.1 Flash Image Preview, a.k.a. \"Nano Banana 2,\" is Google’s latest state of the art image generation and editing model, delivering Pro-level visual quality at Flash speed. It combines...","context_length":65536,"architecture":{"modality":"text+image->text+image","input_modalities":["image","text"],"output_modalities":["image","text"],"tokenizer":"Gemini","instruct_type":null},"pricing":{"prompt":1.3965e-07,"completion":8.379000000000002e-07,"request":0.0,"image":0.0,"web_search":0.005586000000000001,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.0091521024,"max_completion_cost":0.05491261440000001,"max_cost":0.05491261440000001},"sats_pricing":{"prompt":0.00017341573426475737,"completion":0.0010404944055885444,"request":0.001,"image":0.0,"web_search":6.936629370590294,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":11.36497356077514,"max_completion_cost":68.18984136465085,"max_cost":68.18984136465085},"per_request_limits":null,"top_provider":{"context_length":65536,"max_completion_tokens":65536,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"google/gemini-3.1-flash-image-preview-20260226","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3.5-35b-a3b","name":"Qwen: Qwen3.5-35B-A3B","created":1772053822,"description":"The Qwen3.5 Series 35B-A3B is a native vision-language model designed with a hybrid architecture that integrates linear attention mechanisms and a sparse mixture-of-experts model, achieving higher inference efficiency. Its overall...","context_length":262144,"architecture":{"modality":"text+image+video->text","input_modalities":["text","image","video"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":6.807937500000001e-08,"completion":5.446350000000001e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.995e-08,"input_cache_write":0.0,"max_prompt_cost":0.017846599680000002,"max_completion_cost":0.14277279744000002,"max_cost":0.14277279744000002},"sats_pricing":{"prompt":8.454017045406922e-05,"completion":0.0006763213636325538,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":2.4773676323536764e-05,"input_cache_write":0.0,"max_prompt_cost":22.161698443511522,"max_completion_cost":177.29358754809218,"max_cost":177.29358754809218},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":262144,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3.5-35b-a3b-20260224","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3.5-27b","name":"Qwen: Qwen3.5-27B","created":1772053810,"description":"The Qwen3.5 27B native vision-language Dense model incorporates a linear attention mechanism, delivering fast response times while balancing inference speed and performance. Its overall capabilities are comparable to those of...","context_length":262144,"architecture":{"modality":"text+image+video->text","input_modalities":["text","image","video"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":8.169525e-08,"completion":6.53562e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.021415919616,"max_completion_cost":0.042831839232,"max_cost":0.058893778944},"sats_pricing":{"prompt":0.00010144820454488305,"completion":0.0008115856363590644,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":26.59403813221382,"max_completion_cost":53.18807626442764,"max_cost":73.133604863588},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":65536,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3.5-27b-20260224","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3.5-122b-a10b","name":"Qwen: Qwen3.5-122B-A10B","created":1772053789,"description":"The Qwen3.5 122B-A10B native vision-language model is built on a hybrid architecture that integrates a linear attention mechanism with a sparse mixture-of-experts model, achieving higher inference efficiency. In terms of...","context_length":262144,"architecture":{"modality":"text+image+video->text","input_modalities":["text","image","video"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":1.0892700000000002e-07,"completion":8.714160000000002e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.028554559488000005,"max_completion_cost":0.05710911897600001,"max_cost":0.07852503859200001},"sats_pricing":{"prompt":0.00013526427272651077,"completion":0.0010821141818120861,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":35.45871750961844,"max_completion_cost":70.91743501923688,"max_cost":97.51147315145069},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":65536,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3.5-122b-a10b-20260224","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3.5-flash-02-23","name":"Qwen: Qwen3.5-Flash","created":1772053776,"description":"The Qwen3.5 native vision-language Flash models are built on a hybrid architecture that integrates a linear attention mechanism with a sparse mixture-of-experts model, achieving higher inference efficiency. Compared to the...","context_length":1000000,"architecture":{"modality":"text+image+video->text","input_modalities":["text","image","video"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":2.7231750000000005e-08,"completion":1.0892700000000002e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":3.241875e-08,"max_prompt_cost":0.027231750000000006,"max_completion_cost":0.007138639872000001,"max_cost":0.03258572990400001},"sats_pricing":{"prompt":3.381606818162769e-05,"completion":0.00013526427272651077,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":4.0257224025747244e-05,"max_prompt_cost":33.81606818162769,"max_completion_cost":8.86467937740461,"max_cost":40.46457771468115},"per_request_limits":null,"top_provider":{"context_length":1000000,"max_completion_tokens":65536,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3.5-flash-20260224","alias_ids":null,"forwarded_model_id":null},{"id":"lfm-2-24b-a2b","name":"LiquidAI: LFM2-24B-A2B","created":1772048711,"description":"LFM2-24B-A2B is the largest model in the LFM2 family of hybrid architectures designed for efficient on-device deployment. Built as a 24B parameter Mixture-of-Experts model with only 2B active parameters per...","context_length":32768,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":1.25685e-08,"completion":5.0274e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.000411844608,"max_completion_cost":0.001647378432,"max_cost":0.001647378432},"sats_pricing":{"prompt":1.560741608382816e-05,"completion":6.242966433531264e-05,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.5114238102348811,"max_completion_cost":2.0456952409395246,"max_cost":2.0456952409395246},"per_request_limits":null,"top_provider":{"context_length":32768,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"liquid/lfm-2-24b-a2b-20260224","alias_ids":null,"forwarded_model_id":null},{"id":"gemini-3.1-pro-preview-customtools","name":"Google: Gemini 3.1 Pro Preview Custom Tools","created":1772045923,"description":"Gemini 3.1 Pro Preview Custom Tools is a variant of Gemini 3.1 Pro that improves tool selection behavior by preventing overuse of a general bash tool when more efficient third-party...","context_length":1048576,"architecture":{"modality":"text+image+file+audio+video->text","input_modalities":["text","audio","image","video","file"],"output_modalities":["text"],"tokenizer":"Gemini","instruct_type":null},"pricing":{"prompt":5.586e-07,"completion":3.3516000000000007e-06,"request":0.0,"image":7.98e-07,"web_search":0.005586000000000001,"internal_reasoning":4.7880000000000006e-06,"input_cache_read":7.98e-08,"input_cache_write":1.4962500000000002e-07,"max_prompt_cost":0.5857345536,"max_completion_cost":0.21965045760000004,"max_cost":0.7687766016},"sats_pricing":{"prompt":0.0006936629370590295,"completion":0.004161977622354178,"request":0.001,"image":0.0009909470529414707,"web_search":6.936629370590294,"internal_reasoning":0.005945682317648824,"input_cache_read":9.909470529414706e-05,"input_cache_write":0.00018580257242652574,"max_prompt_cost":727.3583078896089,"max_completion_cost":272.7593654586034,"max_cost":954.6577791051117},"per_request_limits":null,"top_provider":{"context_length":1048576,"max_completion_tokens":65536,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"google/gemini-3.1-pro-preview-customtools-20260219","alias_ids":null,"forwarded_model_id":null},{"id":"aion-2.0","name":"AionLabs: Aion-2.0","created":1771881306,"description":"Aion-2.0 is a variant of DeepSeek V3.2 optimized for immersive roleplaying and storytelling. It is particularly strong at introducing tension, crises, and conflict into stories, making narratives feel more engaging....","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":3.3516000000000004e-07,"completion":6.703200000000001e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":7.98e-08,"input_cache_write":0.0,"max_prompt_cost":0.043930091520000006,"max_completion_cost":0.021965045760000003,"max_cost":0.05491261440000001},"sats_pricing":{"prompt":0.0004161977622354177,"completion":0.0008323955244708353,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":9.909470529414706e-05,"input_cache_write":0.0,"max_prompt_cost":54.551873091720665,"max_completion_cost":27.275936545860333,"max_cost":68.18984136465085},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":32768,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"aion-labs/aion-2.0-20260223","alias_ids":null,"forwarded_model_id":null},{"id":"gemini-3.1-pro-preview","name":"Google: Gemini 3.1 Pro Preview","created":1771509627,"description":"Gemini 3.1 Pro Preview is Google’s frontier reasoning model, delivering enhanced software engineering performance, improved agentic reliability, and more efficient token usage across complex workflows. Building on the multimodal foundation...","context_length":1048576,"architecture":{"modality":"text+image+file+audio+video->text","input_modalities":["audio","file","image","text","video"],"output_modalities":["text"],"tokenizer":"Gemini","instruct_type":null},"pricing":{"prompt":5.586e-07,"completion":3.3516000000000007e-06,"request":0.0,"image":7.98e-07,"web_search":0.005586000000000001,"internal_reasoning":4.7880000000000006e-06,"input_cache_read":7.98e-08,"input_cache_write":1.4962500000000002e-07,"max_prompt_cost":0.5857345536,"max_completion_cost":0.21965045760000004,"max_cost":0.7687766016},"sats_pricing":{"prompt":0.0006936629370590295,"completion":0.004161977622354178,"request":0.001,"image":0.0009909470529414707,"web_search":6.936629370590294,"internal_reasoning":0.005945682317648824,"input_cache_read":9.909470529414706e-05,"input_cache_write":0.00018580257242652574,"max_prompt_cost":727.3583078896089,"max_completion_cost":272.7593654586034,"max_cost":954.6577791051117},"per_request_limits":null,"top_provider":{"context_length":1048576,"max_completion_tokens":65536,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"google/gemini-3.1-pro-preview-20260219","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3.5-plus-02-15","name":"Qwen: Qwen3.5 Plus 2026-02-15","created":1771229416,"description":"The Qwen3.5 native vision-language series Plus models are built on a hybrid architecture that integrates linear attention mechanisms with sparse mixture-of-experts models, achieving higher inference efficiency. In a variety of...","context_length":1000000,"architecture":{"modality":"text+image+video->text","input_modalities":["text","image","video"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":1.0892700000000002e-07,"completion":6.53562e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":1.29675e-07,"max_prompt_cost":0.10892700000000002,"max_completion_cost":0.042831839232,"max_cost":0.14462019936000003},"sats_pricing":{"prompt":0.00013526427272651077,"completion":0.0008115856363590644,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.00016102889610298898,"max_prompt_cost":135.26427272651077,"max_completion_cost":53.18807626442764,"max_cost":179.58766961353382},"per_request_limits":null,"top_provider":{"context_length":1000000,"max_completion_tokens":65536,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3.5-plus-20260216","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3.5-397b-a17b","name":"Qwen: Qwen3.5 397B A17B","created":1771223018,"description":"The Qwen3.5 series 397B-A17B native vision-language model is built on a hybrid architecture that integrates a linear attention mechanism with a sparse mixture-of-experts model, achieving higher inference efficiency. It delivers...","context_length":262144,"architecture":{"modality":"text+image+video->text","input_modalities":["text","image","video"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":1.633905e-07,"completion":9.80343e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":7.780500000000001e-08,"input_cache_write":0.0,"max_prompt_cost":0.042831839232,"max_completion_cost":0.064247758848,"max_cost":0.096371638272},"sats_pricing":{"prompt":0.0002028964090897661,"completion":0.0012173784545385967,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":9.66173376617934e-05,"input_cache_write":0.0,"max_prompt_cost":53.18807626442764,"max_completion_cost":79.78211439664148,"max_cost":119.6731715949622},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":65536,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3.5-397b-a17b-20260216","alias_ids":null,"forwarded_model_id":null},{"id":"minimax-m2.5","name":"MiniMax: MiniMax M2.5","created":1770908502,"description":"MiniMax-M2.5 is a SOTA large language model designed for real-world productivity. Trained in a diverse range of complex real-world digital working environments, M2.5 builds upon the coding expertise of M2.1...","context_length":196608,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":6.28425e-08,"completion":4.817925000000001e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.1969999999999999e-08,"input_cache_write":0.0,"max_prompt_cost":0.01235533824,"max_completion_cost":0.06314950656000001,"max_cost":0.06726795264000002},"sats_pricing":{"prompt":7.803708041914081e-05,"completion":0.0005982842832134129,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.4864205794122058e-05,"input_cache_write":0.0,"max_prompt_cost":15.342714307046435,"max_completion_cost":78.41831756934846,"max_cost":83.53255567169728},"per_request_limits":null,"top_provider":{"context_length":196608,"max_completion_tokens":131072,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"minimax/minimax-m2.5-20260211","alias_ids":null,"forwarded_model_id":null},{"id":"glm-5","name":"Z.ai: GLM 5","created":1770829182,"description":"GLM-5 is Z.ai’s flagship open-source foundation model engineered for complex systems design and long-horizon agent workflows. Built for expert developers, it delivers production-grade performance on large-scale programming tasks, rivaling leading...","context_length":202752,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":2.5137e-07,"completion":8.714160000000002e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":4.7879999999999996e-08,"input_cache_write":0.0,"max_prompt_cost":0.05096577024,"max_completion_cost":0.014277279744000003,"max_cost":0.061124603904000006},"sats_pricing":{"prompt":0.00031214832167656326,"completion":0.0010821141818120861,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":5.945682317648823e-05,"input_cache_write":0.0,"max_prompt_cost":63.288696516566546,"max_completion_cost":17.72935875480922,"max_cost":75.90381716902696},"per_request_limits":null,"top_provider":{"context_length":202752,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"z-ai/glm-5-20260211","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3-max-thinking","name":"Qwen: Qwen3 Max Thinking","created":1770671901,"description":"Qwen3-Max-Thinking is the flagship reasoning model in the Qwen3 series, designed for high-stakes cognitive tasks that require deep, multi-step reasoning. By significantly scaling model capacity and reinforcement learning compute, it...","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen","instruct_type":null},"pricing":{"prompt":3.26781e-07,"completion":1.633905e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.085663678464,"max_completion_cost":0.05353979904,"max_cost":0.12849551769599998},"sats_pricing":{"prompt":0.0004057928181795322,"completion":0.002028964090897661,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":106.37615252885529,"max_completion_cost":66.48509533053456,"max_cost":159.56422879328292},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":32768,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3-max-thinking-20260123","alias_ids":null,"forwarded_model_id":null},{"id":"claude-opus-4.6","name":"Anthropic: Claude Opus 4.6","created":1770219050,"description":"Opus 4.6 is Anthropic’s strongest model for coding and long-running professional tasks. It is built for agents that operate across entire workflows rather than single prompts, making it especially effective...","context_length":1000000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Claude","instruct_type":null},"pricing":{"prompt":2.09475e-06,"completion":1.0473750000000001e-05,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":1.995e-07,"input_cache_write":2.49375e-06,"max_prompt_cost":2.0947500000000003,"max_completion_cost":1.34064,"max_cost":3.167262},"sats_pricing":{"prompt":0.0026012360139713604,"completion":0.013006180069856802,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":0.0002477367632353677,"input_cache_write":0.0030967095404420956,"max_prompt_cost":2601.2360139713605,"max_completion_cost":1664.7910489416706,"max_cost":3933.0688531246965},"per_request_limits":null,"top_provider":{"context_length":1000000,"max_completion_tokens":128000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"anthropic/claude-4.6-opus-20260205","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3-coder-next","name":"Qwen: Qwen3 Coder Next","created":1770164101,"description":"Qwen3-Coder-Next is an open-weight causal language model optimized for coding agents and local development workflows. It uses a sparse MoE design with 80B total parameters and only 3B activated per...","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen","instruct_type":null},"pricing":{"prompt":5.0274e-08,"completion":3.3516000000000004e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":2.7930000000000003e-08,"input_cache_write":0.0,"max_prompt_cost":0.013179027456,"max_completion_cost":0.08786018304000001,"max_cost":0.08786018304000001},"sats_pricing":{"prompt":6.242966433531264e-05,"completion":0.0004161977622354177,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":3.468314685295147e-05,"input_cache_write":0.0,"max_prompt_cost":16.365561927516197,"max_completion_cost":109.10374618344133,"max_cost":109.10374618344133},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":262144,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3-coder-next-2025-02-03","alias_ids":null,"forwarded_model_id":null},{"id":"step-3.5-flash","name":"StepFun: Step 3.5 Flash","created":1769728337,"description":"Step 3.5 Flash is StepFun's most capable open-source foundation model. Built on a sparse Mixture of Experts (MoE) architecture, it selectively activates only 11B of its 196B parameters per token....","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":4.1895000000000006e-08,"completion":1.25685e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.010982522880000001,"max_completion_cost":0.00823689216,"max_cost":0.016473784320000004},"sats_pricing":{"prompt":5.202472027942721e-05,"completion":0.00015607416083828163,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":13.637968272930166,"max_completion_cost":10.228476204697625,"max_cost":20.456952409395253},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":65536,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"stepfun/step-3.5-flash","alias_ids":null,"forwarded_model_id":null},{"id":"trinity-large-preview","name":"Arcee AI: Trinity Large Preview","created":1769552670,"description":"Trinity-Large-Preview is a frontier-scale open-weight language model from Arcee, built as a 400B-parameter sparse Mixture-of-Experts with 13B active parameters per token using 4-of-256 expert routing. It excels in creative writing,...","context_length":131000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":6.28425e-08,"completion":1.885275e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.0082323675,"max_completion_cost":0.0246971025,"max_cost":0.0246971025},"sats_pricing":{"prompt":7.803708041914081e-05,"completion":0.00023411124125742241,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":10.222857534907446,"max_completion_cost":30.668572604722335,"max_cost":30.668572604722335},"per_request_limits":null,"top_provider":{"context_length":131000,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"arcee-ai/trinity-large-preview","alias_ids":null,"forwarded_model_id":null},{"id":"kimi-k2.5","name":"MoonshotAI: Kimi K2.5","created":1769487076,"description":"Kimi K2.5 is Moonshot AI's native multimodal model, delivering state-of-the-art visual coding capability and a self-directed agent swarm paradigm. Built on Kimi K2 with continued pretraining over approximately 15T mixed...","context_length":262144,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":1.8433800000000002e-07,"completion":8.379000000000002e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":8.778000000000001e-08,"input_cache_write":0.0,"max_prompt_cost":0.048323100672000005,"max_completion_cost":0.05491177650000001,"max_cost":0.09115428634200001},"sats_pricing":{"prompt":0.00022890876922947973,"completion":0.0010404944055885444,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.00010900417582356178,"input_cache_write":0.0,"max_prompt_cost":60.00706040089273,"max_completion_cost":68.18880087024525,"max_cost":113.19432507968402},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":65535,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"moonshotai/kimi-k2.5-0127","alias_ids":null,"forwarded_model_id":null},{"id":"solar-pro-3","name":"Upstage: Solar Pro 3","created":1769481200,"description":"Solar Pro 3 is Upstage's powerful Mixture-of-Experts (MoE) language model. With 102B total parameters and 12B active parameters per forward pass, it delivers exceptional performance while maintaining computational efficiency. Optimized...","context_length":128000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":6.28425e-08,"completion":2.5137e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":5.9849999999999995e-09,"input_cache_write":0.0,"max_prompt_cost":0.00804384,"max_completion_cost":0.03217536,"max_cost":0.03217536},"sats_pricing":{"prompt":7.803708041914081e-05,"completion":0.00031214832167656326,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":7.432102897061029e-06,"input_cache_write":0.0,"max_prompt_cost":9.988746293650024,"max_completion_cost":39.9549851746001,"max_cost":39.9549851746001},"per_request_limits":null,"top_provider":{"context_length":128000,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"upstage/solar-pro-3","alias_ids":null,"forwarded_model_id":null},{"id":"minimax-m2-her","name":"MiniMax: MiniMax M2-her","created":1769177239,"description":"MiniMax M2-her is a dialogue-first large language model built for immersive roleplay, character-driven chat, and expressive multi-turn conversations. Designed to stay consistent in tone and personality, it supports rich message...","context_length":65536,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":1.25685e-07,"completion":5.0274e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.1969999999999999e-08,"input_cache_write":0.0,"max_prompt_cost":0.00823689216,"max_completion_cost":0.00102961152,"max_cost":0.0090091008},"sats_pricing":{"prompt":0.00015607416083828163,"completion":0.0006242966433531265,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.4864205794122058e-05,"input_cache_write":0.0,"max_prompt_cost":10.228476204697625,"max_completion_cost":1.278559525587203,"max_cost":11.187395848888025},"per_request_limits":null,"top_provider":{"context_length":65536,"max_completion_tokens":2048,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"minimax/minimax-m2-her-20260123","alias_ids":null,"forwarded_model_id":null},{"id":"palmyra-x5","name":"Writer: Palmyra X5","created":1769003823,"description":"Palmyra X5 is Writer's most advanced model, purpose-built for building and scaling AI agents across the enterprise. It delivers industry-leading speed and efficiency on context windows up to 1 million...","context_length":1040000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":2.5137e-07,"completion":2.5137e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.2614248,"max_completion_cost":0.0205922304,"max_cost":0.27995780736000003},"sats_pricing":{"prompt":0.00031214832167656326,"completion":0.0031214832167656323,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":324.6342545436258,"max_completion_cost":25.57119051174406,"max_cost":347.64832600419544},"per_request_limits":null,"top_provider":{"context_length":1040000,"max_completion_tokens":8192,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"writer/palmyra-x5-20250428","alias_ids":null,"forwarded_model_id":null},{"id":"lfm-2.5-1.2b-thinking","name":"LFM2.5-1.2B-Thinking","created":1768927527,"description":"PPQ.AI model","context_length":32768,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Unknown","instruct_type":null},"pricing":{"prompt":1.25685e-07,"completion":4.189500000000001e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.00411844608,"max_completion_cost":0.013728153600000003,"max_cost":0.013728153600000003},"sats_pricing":{"prompt":0.00015607416083828163,"completion":0.0005202472027942722,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":5.114238102348812,"max_completion_cost":17.04746034116271,"max_cost":17.04746034116271},"per_request_limits":null,"top_provider":null,"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":null,"alias_ids":null,"forwarded_model_id":null},{"id":"lfm-2.5-1.2b-instruct","name":"LFM2.5-1.2B-Instruct","created":1768927521,"description":"PPQ.AI model","context_length":32768,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Unknown","instruct_type":null},"pricing":{"prompt":1.25685e-07,"completion":4.189500000000001e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.00411844608,"max_completion_cost":0.013728153600000003,"max_cost":0.013728153600000003},"sats_pricing":{"prompt":0.00015607416083828163,"completion":0.0005202472027942722,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":5.114238102348812,"max_completion_cost":17.04746034116271,"max_cost":17.04746034116271},"per_request_limits":null,"top_provider":null,"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":null,"alias_ids":null,"forwarded_model_id":null},{"id":"gpt-audio","name":"OpenAI: GPT Audio","created":1768862569,"description":"The gpt-audio model is OpenAI's first generally available audio model. The new snapshot features an upgraded decoder for more natural sounding voices and maintains better voice consistency. Audio is priced...","context_length":128000,"architecture":{"modality":"text+audio->text+audio","input_modalities":["text","audio"],"output_modalities":["text","audio"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":1.047375e-06,"completion":4.1895e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.13406400000000002,"max_completion_cost":0.068640768,"max_cost":0.18554457600000002},"sats_pricing":{"prompt":0.0013006180069856802,"completion":0.005202472027942721,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":166.47910489416708,"max_completion_cost":85.23730170581354,"max_cost":230.40708117352722},"per_request_limits":null,"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-audio","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-audio-mini","name":"OpenAI: GPT Audio Mini","created":1768859419,"description":"A cost-efficient version of GPT Audio. The new snapshot features an upgraded decoder for more natural sounding voices and maintains better voice consistency. Input is priced at $0.60 per million...","context_length":128000,"architecture":{"modality":"text+audio->text+audio","input_modalities":["text","audio"],"output_modalities":["text","audio"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":2.5137e-07,"completion":1.00548e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.03217536,"max_completion_cost":0.01647378432,"max_cost":0.04453069824},"sats_pricing":{"prompt":0.00031214832167656326,"completion":0.001248593286706253,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":39.9549851746001,"max_completion_cost":20.45695240939525,"max_cost":55.297699481646525},"per_request_limits":null,"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-audio-mini","alias_ids":null,"forwarded_model_id":null},{"id":"glm-4.7-flash","name":"Z.ai: GLM 4.7 Flash","created":1768833913,"description":"As a 30B-class SOTA model, GLM-4.7-Flash offers a new option that balances performance and efficiency. It is further optimized for agentic coding use cases, strengthening coding capabilities, long-horizon task planning,...","context_length":202752,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":2.5137e-08,"completion":1.6758000000000002e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":3.9900000000000005e-09,"input_cache_write":0.0,"max_prompt_cost":0.005096577024,"max_completion_cost":0.0027456307200000004,"max_cost":0.007430363136},"sats_pricing":{"prompt":3.121483216765632e-05,"completion":0.00020809888111770884,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":4.954735264707353e-06,"input_cache_write":0.0,"max_prompt_cost":6.328869651656654,"max_completion_cost":3.4094920682325416,"max_cost":9.226937909654314},"per_request_limits":null,"top_provider":{"context_length":202752,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"z-ai/glm-4.7-flash-20260119","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-5.2-codex","name":"OpenAI: GPT-5.2-Codex","created":1768409315,"description":"GPT-5.2-Codex is an upgraded version of GPT-5.1-Codex optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks....","context_length":400000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":7.331625e-07,"completion":5.8653e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":6.9825e-08,"input_cache_write":0.0,"max_prompt_cost":0.293265,"max_completion_cost":0.7507584,"max_cost":0.9501786000000001},"sats_pricing":{"prompt":0.0009104326048899762,"completion":0.0072834608391198095,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":8.670786713237869e-05,"input_cache_write":0.0,"max_prompt_cost":364.1730419559904,"max_completion_cost":932.2829874073356,"max_cost":1179.920655937409},"per_request_limits":null,"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-5.2-codex-20260114","alias_ids":null,"forwarded_model_id":null},{"id":"olmo-3.1-32b-instruct","name":"AllenAI: Olmo 3.1 32B Instruct","created":1767728554,"description":"Olmo 3.1 32B Instruct is a large-scale, 32-billion-parameter instruction-tuned language model engineered for high-performance conversational AI, multi-turn dialogue, and practical instruction following. As part of the Olmo 3.1 family, this...","context_length":65536,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":8.379000000000001e-08,"completion":2.5137e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.005491261440000001,"max_completion_cost":0.00411844608,"max_cost":0.008236892160000002},"sats_pricing":{"prompt":0.00010404944055885442,"completion":0.00031214832167656326,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":6.818984136465083,"max_completion_cost":5.114238102348812,"max_cost":10.228476204697627},"per_request_limits":null,"top_provider":{"context_length":65536,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"allenai/olmo-3.1-32b-instruct-20251215","alias_ids":null,"forwarded_model_id":null},{"id":"seed-1.6-flash","name":"ByteDance Seed: Seed 1.6 Flash","created":1766505011,"description":"Seed 1.6 Flash is an ultra-fast multimodal deep thinking model by ByteDance Seed, supporting both text and visual understanding. It features a 256k context window and can generate outputs of...","context_length":262144,"architecture":{"modality":"text+image+video->text","input_modalities":["image","text","video"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":3.142125e-08,"completion":1.25685e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.00823689216,"max_completion_cost":0.00411844608,"max_cost":0.01132572672},"sats_pricing":{"prompt":3.901854020957041e-05,"completion":0.00015607416083828163,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":10.228476204697625,"max_completion_cost":5.114238102348812,"max_cost":14.064154781459234},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":32768,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"bytedance-seed/seed-1.6-flash-20250625","alias_ids":null,"forwarded_model_id":null},{"id":"seed-1.6","name":"ByteDance Seed: Seed 1.6","created":1766504997,"description":"Seed 1.6 is a general-purpose model released by the ByteDance Seed team. It incorporates multimodal capabilities and adaptive deep thinking with a 256K context window.","context_length":262144,"architecture":{"modality":"text+image+video->text","input_modalities":["image","text","video"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":1.0473750000000002e-07,"completion":8.379000000000002e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.027456307200000005,"max_completion_cost":0.027456307200000005,"max_cost":0.051480576000000014},"sats_pricing":{"prompt":0.00013006180069856805,"completion":0.0010404944055885444,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":34.09492068232542,"max_completion_cost":34.09492068232542,"max_cost":63.927976279360166},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":32768,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"bytedance-seed/seed-1.6-20250625","alias_ids":null,"forwarded_model_id":null},{"id":"minimax-m2.1","name":"MiniMax: MiniMax M2.1","created":1766454997,"description":"MiniMax-M2.1 is a lightweight, state-of-the-art large language model optimized for coding, agentic workflows, and modern application development. With only 10 billion activated parameters, it delivers a major jump in real-world...","context_length":196608,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":1.214955e-07,"completion":3.980025e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.1969999999999999e-08,"input_cache_write":0.0,"max_prompt_cost":0.023886987264,"max_completion_cost":0.07825047552,"max_cost":0.07825047552},"sats_pricing":{"prompt":0.0001508716888103389,"completion":0.0004942348426545584,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.4864205794122058e-05,"input_cache_write":0.0,"max_prompt_cost":29.66258099362311,"max_completion_cost":97.17052394462743,"max_cost":97.17052394462743},"per_request_limits":null,"top_provider":{"context_length":196608,"max_completion_tokens":196608,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"minimax/minimax-m2.1","alias_ids":null,"forwarded_model_id":null},{"id":"glm-4.7","name":"Z.ai: GLM 4.7","created":1766378014,"description":"GLM-4.7 is Z.ai’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while...","context_length":202752,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":1.5920100000000002e-07,"completion":7.28973e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.032278321152,"max_completion_cost":0.147800733696,"max_cost":0.147800733696},"sats_pricing":{"prompt":0.0001976939370618234,"completion":0.0009052301328620334,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":40.082841127158815,"max_completion_cost":183.537219898043,"max_cost":183.537219898043},"per_request_limits":null,"top_provider":{"context_length":202752,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"z-ai/glm-4.7-20251222","alias_ids":null,"forwarded_model_id":null},{"id":"mimo-v2-flash","name":"Xiaomi: MiMo-V2-Flash","created":1765731308,"description":"MiMo-V2-Flash is an open-source foundation language model developed by Xiaomi. It is a Mixture-of-Experts model with 309B total parameters and 15B active parameters, adopting hybrid attention architecture. MiMo-V2-Flash supports a...","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":3.7705500000000006e-08,"completion":1.214955e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.7955e-08,"input_cache_write":0.0,"max_prompt_cost":0.009884270592000002,"max_completion_cost":0.007962329088,"max_cost":0.015375532032},"sats_pricing":{"prompt":4.6822248251484494e-05,"completion":0.0001508716888103389,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":2.229630869118309e-05,"input_cache_write":0.0,"max_prompt_cost":12.274171445637151,"max_completion_cost":9.887526997874371,"max_cost":19.09315558210223},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":65536,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"xiaomi/mimo-v2-flash-20251210","alias_ids":null,"forwarded_model_id":null},{"id":"nemotron-3-nano-30b-a3b","name":"NVIDIA: Nemotron 3 Nano 30B A3B","created":1765731275,"description":"NVIDIA Nemotron 3 Nano 30B A3B is a small language MoE model with highest compute efficiency and accuracy for developers to build specialized agentic AI systems. The model is fully...","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":2.0947500000000003e-08,"completion":8.379000000000001e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.005491261440000001,"max_completion_cost":0.019104120000000002,"max_cost":0.01981935144},"sats_pricing":{"prompt":2.6012360139713605e-05,"completion":0.00010404944055885442,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":6.818984136465083,"max_completion_cost":23.723272447418807,"max_cost":24.61143847202919},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":228000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"nvidia/nemotron-3-nano-30b-a3b","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-5.2-chat","name":"OpenAI: GPT-5.2 Chat","created":1765389783,"description":"GPT-5.2 Chat (AKA Instant) is the fast, lightweight member of the 5.2 family, optimized for low-latency chat while retaining strong general intelligence. It uses adaptive reasoning to selectively “think” on...","context_length":128000,"architecture":{"modality":"text+image+file->text","input_modalities":["file","image","text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":7.331625e-07,"completion":5.8653e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":6.9825e-08,"input_cache_write":0.0,"max_prompt_cost":0.0938448,"max_completion_cost":0.1876896,"max_cost":0.2580732},"sats_pricing":{"prompt":0.0009104326048899762,"completion":0.0072834608391198095,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":8.670786713237869e-05,"input_cache_write":0.0,"max_prompt_cost":116.53537342591694,"max_completion_cost":233.0707468518339,"max_cost":320.4722769212716},"per_request_limits":null,"top_provider":{"context_length":128000,"max_completion_tokens":32000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-5.2-chat-20251211","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-5.2-pro","name":"OpenAI: GPT-5.2 Pro","created":1765389780,"description":"GPT-5.2 Pro is OpenAI’s most advanced model, offering major improvements in agentic coding and long context performance over GPT-5 Pro. It is optimized for complex tasks that require step-by-step reasoning,...","context_length":400000,"architecture":{"modality":"text+image+file->text","input_modalities":["image","text","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":8.797950000000001e-06,"completion":7.038360000000001e-05,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":3.5191800000000004,"max_completion_cost":9.0091008,"max_cost":11.402143200000001},"sats_pricing":{"prompt":0.010925191258679714,"completion":0.08740153006943771,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":4370.076503471886,"max_completion_cost":11187.395848888027,"max_cost":14159.04787124891},"per_request_limits":null,"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-5.2-pro-20251211","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-5.2","name":"OpenAI: GPT-5.2","created":1765389775,"description":"GPT-5.2 is the latest frontier-grade model in the GPT-5 series, offering stronger agentic and long context perfomance compared to GPT-5.1. It uses adaptive reasoning to allocate computation dynamically, responding quickly...","context_length":400000,"architecture":{"modality":"text+image+file->text","input_modalities":["file","image","text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":7.331625e-07,"completion":5.8653e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":6.9825e-08,"input_cache_write":0.0,"max_prompt_cost":0.293265,"max_completion_cost":0.7507584,"max_cost":0.9501786000000001},"sats_pricing":{"prompt":0.0009104326048899762,"completion":0.0072834608391198095,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":8.670786713237869e-05,"input_cache_write":0.0,"max_prompt_cost":364.1730419559904,"max_completion_cost":932.2829874073356,"max_cost":1179.920655937409},"per_request_limits":null,"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-5.2-20251211","alias_ids":null,"forwarded_model_id":null},{"id":"devstral-2512","name":"Mistral: Devstral 2 2512","created":1765285419,"description":"Devstral 2 is a state-of-the-art open-source model by Mistral AI specializing in agentic coding. It is a 123B-parameter dense transformer model supporting a 256K context window. Devstral 2 supports exploring...","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":1.6758000000000002e-07,"completion":8.379000000000002e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.5960000000000002e-08,"input_cache_write":0.0,"max_prompt_cost":0.043930091520000006,"max_completion_cost":0.21965045760000004,"max_cost":0.21965045760000004},"sats_pricing":{"prompt":0.00020809888111770884,"completion":0.0010404944055885444,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.9818941058829413e-05,"input_cache_write":0.0,"max_prompt_cost":54.551873091720665,"max_completion_cost":272.7593654586034,"max_cost":272.7593654586034},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"mistralai/devstral-2512","alias_ids":null,"forwarded_model_id":null},{"id":"relace-search","name":"Relace: Relace Search","created":1765213560,"description":"The relace-search model uses 4-12 `view_file` and `grep` tools in parallel to explore a codebase and return relevant files to the user request. In contrast to RAG, relace-search performs agentic...","context_length":256000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":4.189500000000001e-07,"completion":1.25685e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.10725120000000002,"max_completion_cost":0.16087680000000001,"max_cost":0.21450240000000004},"sats_pricing":{"prompt":0.0005202472027942722,"completion":0.0015607416083828162,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":133.18328391533367,"max_completion_cost":199.77492587300048,"max_cost":266.36656783066735},"per_request_limits":null,"top_provider":{"context_length":256000,"max_completion_tokens":128000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"relace/relace-search-20251208","alias_ids":null,"forwarded_model_id":null},{"id":"glm-4.6v","name":"Z.ai: GLM 4.6V","created":1765207462,"description":"GLM-4.6V is a large multimodal model designed for high-fidelity visual understanding and long-context reasoning across images, documents, and mixed media. It supports up to 128K tokens, processes complex page layouts...","context_length":131072,"architecture":{"modality":"text+image+video->text","input_modalities":["image","text","video"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":1.25685e-07,"completion":3.77055e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.995e-08,"input_cache_write":0.0,"max_prompt_cost":0.01647378432,"max_completion_cost":0.00904932,"max_cost":0.02250666432},"sats_pricing":{"prompt":0.00015607416083828163,"completion":0.00046822248251484483,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":2.4773676323536764e-05,"input_cache_write":0.0,"max_prompt_cost":20.45695240939525,"max_completion_cost":11.237339580356275,"max_cost":27.948512129632768},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":24000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"z-ai/glm-4.6-20251208","alias_ids":null,"forwarded_model_id":null},{"id":"deepseek-v3.1-nex-n1","name":"Nex AGI: DeepSeek V3.1 Nex N1","created":1765204393,"description":"DeepSeek V3.1 Nex-N1 is the flagship release of the Nex-N1 series — a post-trained model designed to highlight agent autonomy, tool use, and real-world productivity. Nex-N1 demonstrates competitive performance across...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek","instruct_type":null},"pricing":{"prompt":5.6558249999999996e-08,"completion":2.0947500000000004e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.0074132029439999995,"max_completion_cost":0.027456307200000005,"max_cost":0.027456307200000005},"sats_pricing":{"prompt":7.023337237722673e-05,"completion":0.0002601236013971361,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":9.205628584227862,"max_completion_cost":34.09492068232542,"max_cost":34.09492068232542},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":163840,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"nex-agi/deepseek-v3.1-nex-n1","alias_ids":null,"forwarded_model_id":null},{"id":"rnj-1-instruct","name":"EssentialAI: Rnj 1 Instruct","created":1765094847,"description":"Rnj-1 is an 8B-parameter, dense, open-weight model family developed by Essential AI and trained from scratch with a focus on programming, math, and scientific reasoning. The model demonstrates strong performance...","context_length":32768,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":6.28425e-08,"completion":6.28425e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.00205922304,"max_completion_cost":0.00205922304,"max_cost":0.00205922304},"sats_pricing":{"prompt":7.803708041914081e-05,"completion":7.803708041914081e-05,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":2.557119051174406,"max_completion_cost":2.557119051174406,"max_cost":2.557119051174406},"per_request_limits":null,"top_provider":{"context_length":32768,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"essentialai/rnj-1-instruct","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-5.1-codex-max","name":"OpenAI: GPT-5.1-Codex-Max","created":1764878934,"description":"GPT-5.1-Codex-Max is OpenAI’s latest agentic coding model, designed for long-running, high-context software development tasks. It is based on an updated version of the 5.1 reasoning stack and trained on agentic...","context_length":400000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":5.236875e-07,"completion":4.1895e-06,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":4.9875e-08,"input_cache_write":0.0,"max_prompt_cost":0.20947500000000002,"max_completion_cost":0.5362560000000001,"max_cost":0.678699},"sats_pricing":{"prompt":0.0006503090034928401,"completion":0.005202472027942721,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":6.193419080884192e-05,"input_cache_write":0.0,"max_prompt_cost":260.12360139713604,"max_completion_cost":665.9164195766683,"max_cost":842.8004685267208},"per_request_limits":null,"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-5.1-codex-max-20251204","alias_ids":null,"forwarded_model_id":null},{"id":"nova-2-lite-v1","name":"Amazon: Nova 2 Lite","created":1764696672,"description":"Nova 2 Lite is a fast, cost-effective reasoning model for everyday workloads that can process text, images, and videos to generate text. Nova 2 Lite demonstrates standout capabilities in processing...","context_length":1000000,"architecture":{"modality":"text+image+file+video->text","input_modalities":["text","image","video","file"],"output_modalities":["text"],"tokenizer":"Nova","instruct_type":null},"pricing":{"prompt":1.25685e-07,"completion":1.047375e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.125685,"max_completion_cost":0.06863972062500001,"max_cost":0.18608795415},"sats_pricing":{"prompt":0.00015607416083828163,"completion":0.0013006180069856802,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":156.0741608382816,"max_completion_cost":85.23600108780656,"max_cost":231.08184179555138},"per_request_limits":null,"top_provider":{"context_length":1000000,"max_completion_tokens":65535,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"amazon/nova-2-lite-v1","alias_ids":null,"forwarded_model_id":null},{"id":"ministral-14b-2512","name":"Mistral: Ministral 3 14B 2512","created":1764681735,"description":"The largest model in the Ministral 3 family, Ministral 3 14B offers frontier capabilities and performance comparable to its larger Mistral Small 3.2 24B counterpart. A powerful and efficient language...","context_length":262144,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":8.379000000000001e-08,"completion":8.379000000000001e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":7.980000000000001e-09,"input_cache_write":0.0,"max_prompt_cost":0.021965045760000003,"max_completion_cost":0.021965045760000003,"max_cost":0.021965045760000003},"sats_pricing":{"prompt":0.00010404944055885442,"completion":0.00010404944055885442,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":9.909470529414706e-06,"input_cache_write":0.0,"max_prompt_cost":27.275936545860333,"max_completion_cost":27.275936545860333,"max_cost":27.275936545860333},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"mistralai/ministral-14b-2512","alias_ids":null,"forwarded_model_id":null},{"id":"ministral-8b-2512","name":"Mistral: Ministral 3 8B 2512","created":1764681654,"description":"A balanced model in the Ministral 3 family, Ministral 3 8B is a powerful, efficient tiny language model with vision capabilities.","context_length":262144,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":6.28425e-08,"completion":6.28425e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":5.9849999999999995e-09,"input_cache_write":0.0,"max_prompt_cost":0.01647378432,"max_completion_cost":0.01647378432,"max_cost":0.01647378432},"sats_pricing":{"prompt":7.803708041914081e-05,"completion":7.803708041914081e-05,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":7.432102897061029e-06,"input_cache_write":0.0,"max_prompt_cost":20.45695240939525,"max_completion_cost":20.45695240939525,"max_cost":20.45695240939525},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"mistralai/ministral-8b-2512","alias_ids":null,"forwarded_model_id":null},{"id":"ministral-3b-2512","name":"Mistral: Ministral 3 3B 2512","created":1764681560,"description":"The smallest model in the Ministral 3 family, Ministral 3 3B is a powerful, efficient tiny language model with vision capabilities.","context_length":131072,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":4.1895000000000006e-08,"completion":4.1895000000000006e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":3.9900000000000005e-09,"input_cache_write":0.0,"max_prompt_cost":0.005491261440000001,"max_completion_cost":0.005491261440000001,"max_cost":0.005491261440000001},"sats_pricing":{"prompt":5.202472027942721e-05,"completion":5.202472027942721e-05,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":4.954735264707353e-06,"input_cache_write":0.0,"max_prompt_cost":6.818984136465083,"max_completion_cost":6.818984136465083,"max_cost":6.818984136465083},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"mistralai/ministral-3b-2512","alias_ids":null,"forwarded_model_id":null},{"id":"mistral-large-2512","name":"Mistral: Mistral Large 3 2512","created":1764624472,"description":"Mistral Large 3 2512 is Mistral’s most capable model to date, featuring a sparse mixture-of-experts architecture with 41B active parameters (675B total), and released under the Apache 2.0 license.","context_length":262144,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":2.0947500000000004e-07,"completion":6.28425e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.995e-08,"input_cache_write":0.0,"max_prompt_cost":0.05491261440000001,"max_completion_cost":0.1647378432,"max_cost":0.1647378432},"sats_pricing":{"prompt":0.0002601236013971361,"completion":0.0007803708041914081,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":2.4773676323536764e-05,"input_cache_write":0.0,"max_prompt_cost":68.18984136465085,"max_completion_cost":204.56952409395248,"max_cost":204.56952409395248},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"mistralai/mistral-large-2512","alias_ids":null,"forwarded_model_id":null},{"id":"trinity-mini","name":"Arcee AI: Trinity Mini","created":1764601720,"description":"Trinity Mini is a 26B-parameter (3B active) sparse mixture-of-experts language model featuring 128 experts with 8 active per token. Engineered for efficient reasoning over long contexts (131k) with robust function...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":1.8852750000000003e-08,"completion":6.28425e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.0024710676480000004,"max_completion_cost":0.00823689216,"max_cost":0.00823689216},"sats_pricing":{"prompt":2.3411124125742247e-05,"completion":7.803708041914081e-05,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":3.068542861409288,"max_completion_cost":10.228476204697625,"max_cost":10.228476204697625},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":131072,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"arcee-ai/trinity-mini-20251201","alias_ids":null,"forwarded_model_id":null},{"id":"deepseek-v3.2-speciale","name":"DeepSeek: DeepSeek V3.2 Speciale","created":1764594837,"description":"DeepSeek-V3.2-Speciale is a high-compute variant of DeepSeek-V3.2 optimized for maximum reasoning and agentic performance. It builds on DeepSeek Sparse Attention (DSA) for efficient long-context processing, then scales post-training reinforcement learning...","context_length":163840,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek","instruct_type":null},"pricing":{"prompt":1.6758000000000002e-07,"completion":5.0274e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":7.98e-08,"input_cache_write":0.0,"max_prompt_cost":0.027456307200000002,"max_completion_cost":0.0823689216,"max_cost":0.0823689216},"sats_pricing":{"prompt":0.00020809888111770884,"completion":0.0006242966433531265,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":9.909470529414706e-05,"input_cache_write":0.0,"max_prompt_cost":34.094920682325416,"max_completion_cost":102.28476204697624,"max_cost":102.28476204697624},"per_request_limits":null,"top_provider":{"context_length":163840,"max_completion_tokens":163840,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"deepseek/deepseek-v3.2-speciale-20251201","alias_ids":null,"forwarded_model_id":null},{"id":"deepseek-v3.2","name":"DeepSeek: DeepSeek V3.2","created":1764594642,"description":"DeepSeek-V3.2 is a large language model designed to harmonize high computational efficiency with strong reasoning and agentic tool-use performance. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek","instruct_type":null},"pricing":{"prompt":1.0557540000000001e-07,"completion":1.583631e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.00548e-08,"input_cache_write":0.0,"max_prompt_cost":0.013837978828800002,"max_completion_cost":0.0103784841216,"max_cost":0.017297473536000002},"sats_pricing":{"prompt":0.00013110229510415657,"completion":0.00019665344265623483,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.248593286706253e-05,"input_cache_write":0.0,"max_prompt_cost":17.18384002389201,"max_completion_cost":12.887880017919006,"max_cost":21.47980002986501},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":65536,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"deepseek/deepseek-v3.2-20251201","alias_ids":null,"forwarded_model_id":null},{"id":"intellect-3","name":"Prime Intellect: INTELLECT-3","created":1764212534,"description":"INTELLECT-3 is a 106B-parameter Mixture-of-Experts model (12B active) post-trained from GLM-4.5-Air-Base using supervised fine-tuning (SFT) followed by large-scale reinforcement learning (RL). It offers state-of-the-art performance for its size across math,...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":8.379000000000001e-08,"completion":4.60845e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.010982522880000001,"max_completion_cost":0.06040387584,"max_cost":0.06040387584},"sats_pricing":{"prompt":0.00010404944055885442,"completion":0.0005722719230736992,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":13.637968272930166,"max_completion_cost":75.00882550111591,"max_cost":75.00882550111591},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":131072,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"prime-intellect/intellect-3-20251126","alias_ids":null,"forwarded_model_id":null},{"id":"claude-opus-4.5","name":"Anthropic: Claude Opus 4.5","created":1764010580,"description":"Claude Opus 4.5 is Anthropic’s frontier reasoning model optimized for complex software engineering, agentic workflows, and long-horizon computer use. It offers strong multimodal capabilities, competitive performance across real-world coding and...","context_length":200000,"architecture":{"modality":"text+image+file->text","input_modalities":["file","image","text"],"output_modalities":["text"],"tokenizer":"Claude","instruct_type":null},"pricing":{"prompt":2.09475e-06,"completion":1.0473750000000001e-05,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":1.995e-07,"input_cache_write":2.49375e-06,"max_prompt_cost":0.41895000000000004,"max_completion_cost":0.67032,"max_cost":0.955206},"sats_pricing":{"prompt":0.0026012360139713604,"completion":0.013006180069856802,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":0.0002477367632353677,"input_cache_write":0.0030967095404420956,"max_prompt_cost":520.2472027942721,"max_completion_cost":832.3955244708353,"max_cost":1186.1636223709402},"per_request_limits":null,"top_provider":{"context_length":200000,"max_completion_tokens":64000,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"anthropic/claude-4.5-opus-20251124","alias_ids":null,"forwarded_model_id":null},{"id":"olmo-3-32b-think","name":"AllenAI: Olmo 3 32B Think","created":1763758276,"description":"Olmo 3 32B Think is a large-scale, 32-billion-parameter model purpose-built for deep reasoning, complex logic chains and advanced instruction-following scenarios. Its capacity enables strong performance on demanding evaluation tasks and...","context_length":65536,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":6.28425e-08,"completion":2.0947500000000004e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.00411844608,"max_completion_cost":0.013728153600000003,"max_cost":0.013728153600000003},"sats_pricing":{"prompt":7.803708041914081e-05,"completion":0.0002601236013971361,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":5.114238102348812,"max_completion_cost":17.04746034116271,"max_cost":17.04746034116271},"per_request_limits":null,"top_provider":{"context_length":65536,"max_completion_tokens":65536,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"allenai/olmo-3-32b-think-20251121","alias_ids":null,"forwarded_model_id":null},{"id":"gemini-3-pro-image-preview","name":"Google: Nano Banana Pro (Gemini 3 Pro Image Preview)","created":1763653797,"description":"Nano Banana Pro is Google’s most advanced image-generation and editing model, built on Gemini 3 Pro. It extends the original Nano Banana with significantly improved multimodal reasoning, real-world grounding, and...","context_length":65536,"architecture":{"modality":"text+image->text+image","input_modalities":["image","text"],"output_modalities":["image","text"],"tokenizer":"Gemini","instruct_type":null},"pricing":{"prompt":5.586e-07,"completion":3.3516000000000007e-06,"request":0.0,"image":7.98e-07,"web_search":0.005586000000000001,"internal_reasoning":4.7880000000000006e-06,"input_cache_read":7.98e-08,"input_cache_write":1.4962500000000002e-07,"max_prompt_cost":0.0366084096,"max_completion_cost":0.10982522880000002,"max_cost":0.1281294336},"sats_pricing":{"prompt":0.0006936629370590295,"completion":0.004161977622354178,"request":0.001,"image":0.0009909470529414707,"web_search":6.936629370590294,"internal_reasoning":0.005945682317648824,"input_cache_read":9.909470529414706e-05,"input_cache_write":0.00018580257242652574,"max_prompt_cost":45.45989424310056,"max_completion_cost":136.3796827293017,"max_cost":159.10962985085195},"per_request_limits":null,"top_provider":{"context_length":65536,"max_completion_tokens":32768,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"google/gemini-3-pro-image-preview-20251120","alias_ids":null,"forwarded_model_id":null},{"id":"grok-4.1-fast","name":"xAI: Grok 4.1 Fast","created":1763587502,"description":"Grok 4.1 Fast is xAI's best agentic tool calling model that shines in real-world use cases like customer support and deep research. 2M context window. Reasoning can be enabled/disabled using...","context_length":2000000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Grok","instruct_type":null},"pricing":{"prompt":8.379000000000001e-08,"completion":2.0947500000000004e-07,"request":0.0,"image":0.0,"web_search":0.0019950000000000002,"internal_reasoning":0.0,"input_cache_read":1.995e-08,"input_cache_write":0.0,"max_prompt_cost":0.16758000000000003,"max_completion_cost":0.006284250000000002,"max_cost":0.17135055000000002},"sats_pricing":{"prompt":0.00010404944055885442,"completion":0.0002601236013971361,"request":0.001,"image":0.0,"web_search":2.4773676323536766,"internal_reasoning":0.0,"input_cache_read":2.4773676323536764e-05,"input_cache_write":0.0,"max_prompt_cost":208.09888111770886,"max_completion_cost":7.803708041914083,"max_cost":212.7811059428573},"per_request_limits":null,"top_provider":{"context_length":2000000,"max_completion_tokens":30000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"x-ai/grok-4.1-fast","alias_ids":null,"forwarded_model_id":null},{"id":"cogito-v2.1-671b","name":"Deep Cogito: Cogito v2.1 671B","created":1763071233,"description":"Cogito v2.1 671B MoE represents one of the strongest open models globally, matching performance of frontier closed and open models. This model is trained using self play with reinforcement learning...","context_length":128000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":5.236875e-07,"completion":5.236875e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.06703200000000001,"max_completion_cost":0.06703200000000001,"max_cost":0.06703200000000001},"sats_pricing":{"prompt":0.0006503090034928401,"completion":0.0006503090034928401,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":83.23955244708354,"max_completion_cost":83.23955244708354,"max_cost":83.23955244708354},"per_request_limits":null,"top_provider":{"context_length":128000,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"deepcogito/cogito-v2.1-671b-20251118","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-5.1","name":"OpenAI: GPT-5.1","created":1763060305,"description":"GPT-5.1 is the latest frontier-grade model in the GPT-5 series, offering stronger general-purpose reasoning, improved instruction adherence, and a more natural conversational style compared to GPT-5. It uses adaptive reasoning...","context_length":400000,"architecture":{"modality":"text+image+file->text","input_modalities":["image","text","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":5.236875e-07,"completion":4.1895e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":5.187e-08,"input_cache_write":0.0,"max_prompt_cost":0.20947500000000002,"max_completion_cost":0.5362560000000001,"max_cost":0.678699},"sats_pricing":{"prompt":0.0006503090034928401,"completion":0.005202472027942721,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":6.441155844119559e-05,"input_cache_write":0.0,"max_prompt_cost":260.12360139713604,"max_completion_cost":665.9164195766683,"max_cost":842.8004685267208},"per_request_limits":null,"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-5.1-20251113","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-5.1-chat","name":"OpenAI: GPT-5.1 Chat","created":1763060302,"description":"GPT-5.1 Chat (AKA Instant is the fast, lightweight member of the 5.1 family, optimized for low-latency chat while retaining strong general intelligence. It uses adaptive reasoning to selectively “think” on...","context_length":128000,"architecture":{"modality":"text+image+file->text","input_modalities":["file","image","text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":5.236875e-07,"completion":4.1895e-06,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":4.9875e-08,"input_cache_write":0.0,"max_prompt_cost":0.06703200000000001,"max_completion_cost":0.068640768,"max_cost":0.12709267200000002},"sats_pricing":{"prompt":0.0006503090034928401,"completion":0.005202472027942721,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":6.193419080884192e-05,"input_cache_write":0.0,"max_prompt_cost":83.23955244708354,"max_completion_cost":85.23730170581354,"max_cost":157.82219143967038},"per_request_limits":null,"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-5.1-chat-20251113","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-5.1-codex","name":"OpenAI: GPT-5.1-Codex","created":1763060298,"description":"GPT-5.1-Codex is a specialized version of GPT-5.1 optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks....","context_length":400000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":5.236875e-07,"completion":4.1895e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":4.9875e-08,"input_cache_write":0.0,"max_prompt_cost":0.20947500000000002,"max_completion_cost":0.5362560000000001,"max_cost":0.678699},"sats_pricing":{"prompt":0.0006503090034928401,"completion":0.005202472027942721,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":6.193419080884192e-05,"input_cache_write":0.0,"max_prompt_cost":260.12360139713604,"max_completion_cost":665.9164195766683,"max_cost":842.8004685267208},"per_request_limits":null,"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-5.1-codex-20251113","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-5.1-codex-mini","name":"OpenAI: GPT-5.1-Codex-Mini","created":1763057820,"description":"GPT-5.1-Codex-Mini is a smaller and faster version of GPT-5.1-Codex","context_length":400000,"architecture":{"modality":"text+image->text","input_modalities":["image","text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":1.0473750000000002e-07,"completion":8.379000000000002e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.1969999999999999e-08,"input_cache_write":0.0,"max_prompt_cost":0.04189500000000001,"max_completion_cost":0.10725120000000002,"max_cost":0.13573980000000002},"sats_pricing":{"prompt":0.00013006180069856805,"completion":0.0010404944055885444,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.4864205794122058e-05,"input_cache_write":0.0,"max_prompt_cost":52.024720279427214,"max_completion_cost":133.18328391533367,"max_cost":168.56009370534417},"per_request_limits":null,"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-5.1-codex-mini-20251113","alias_ids":null,"forwarded_model_id":null},{"id":"kimi-k2-thinking","name":"MoonshotAI: Kimi K2 Thinking","created":1762440622,"description":"Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in...","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":2.5137e-07,"completion":1.047375e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":5.985e-08,"input_cache_write":0.0,"max_prompt_cost":0.06589513728,"max_completion_cost":0.274563072,"max_cost":0.274563072},"sats_pricing":{"prompt":0.00031214832167656326,"completion":0.0013006180069856802,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":7.432102897061029e-05,"input_cache_write":0.0,"max_prompt_cost":81.827809637581,"max_completion_cost":340.94920682325414,"max_cost":340.94920682325414},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":262144,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"moonshotai/kimi-k2-thinking-20251106","alias_ids":null,"forwarded_model_id":null},{"id":"nova-premier-v1","name":"Amazon: Nova Premier 1.0","created":1761950332,"description":"Amazon Nova Premier is the most capable of Amazon’s multimodal models for complex reasoning tasks and for use as the best teacher for distilling custom models.","context_length":1000000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Nova","instruct_type":null},"pricing":{"prompt":1.047375e-06,"completion":5.2368750000000006e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":2.4937500000000005e-07,"input_cache_write":0.0,"max_prompt_cost":1.0473750000000002,"max_completion_cost":0.16758,"max_cost":1.1814390000000001},"sats_pricing":{"prompt":0.0013006180069856802,"completion":0.006503090034928401,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.00030967095404420964,"input_cache_write":0.0,"max_prompt_cost":1300.6180069856803,"max_completion_cost":208.09888111770883,"max_cost":1467.0971118798473},"per_request_limits":null,"top_provider":{"context_length":1000000,"max_completion_tokens":32000,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"amazon/nova-premier-v1","alias_ids":null,"forwarded_model_id":null},{"id":"sonar-pro-search","name":"Perplexity: Sonar Pro Search","created":1761854366,"description":"Exclusively available on the OpenRouter API, Sonar Pro's new Pro Search mode is Perplexity's most advanced agentic search system. It is designed for deeper reasoning and analysis. Pricing is based...","context_length":200000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":1.25685e-06,"completion":6.28425e-06,"request":0.0,"image":0.0,"web_search":0.007182,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.25137000000000004,"max_completion_cost":0.050274,"max_cost":0.2915892},"sats_pricing":{"prompt":0.0015607416083828162,"completion":0.007803708041914081,"request":0.001,"image":0.0,"web_search":8.918523476473235,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":312.1483216765633,"max_completion_cost":62.429664335312644,"max_cost":362.09205314481335},"per_request_limits":null,"top_provider":{"context_length":200000,"max_completion_tokens":8000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"perplexity/sonar-pro-search","alias_ids":null,"forwarded_model_id":null},{"id":"voxtral-small-24b-2507","name":"Mistral: Voxtral Small 24B 2507","created":1761835144,"description":"Voxtral Small is an enhancement of Mistral Small 3, incorporating state-of-the-art audio input capabilities while retaining best-in-class text performance. It excels at speech transcription, translation and audio understanding. Input audio...","context_length":32000,"architecture":{"modality":"text+audio->text","input_modalities":["text","audio"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":4.1895000000000006e-08,"completion":1.25685e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":3.9900000000000005e-09,"input_cache_write":0.0,"max_prompt_cost":0.0013406400000000001,"max_completion_cost":0.00402192,"max_cost":0.00402192},"sats_pricing":{"prompt":5.202472027942721e-05,"completion":0.00015607416083828163,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":4.954735264707353e-06,"input_cache_write":0.0,"max_prompt_cost":1.6647910489416706,"max_completion_cost":4.994373146825012,"max_cost":4.994373146825012},"per_request_limits":null,"top_provider":{"context_length":32000,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"mistralai/voxtral-small-24b-2507","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-oss-safeguard-20b","name":"OpenAI: gpt-oss-safeguard-20b","created":1761752836,"description":"gpt-oss-safeguard-20b is a safety reasoning model from OpenAI built upon gpt-oss-20b. This open-weight, 21B-parameter Mixture-of-Experts (MoE) model offers lower latency for safety tasks like content classification, LLM filtering, and trust...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":3.142125e-08,"completion":1.25685e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.4763000000000001e-08,"input_cache_write":0.0,"max_prompt_cost":0.00411844608,"max_completion_cost":0.00823689216,"max_cost":0.0102961152},"sats_pricing":{"prompt":3.901854020957041e-05,"completion":0.00015607416083828163,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.8332520479417208e-05,"input_cache_write":0.0,"max_prompt_cost":5.114238102348812,"max_completion_cost":10.228476204697625,"max_cost":12.78559525587203},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":65536,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-oss-safeguard-20b","alias_ids":null,"forwarded_model_id":null},{"id":"nemotron-nano-12b-v2-vl","name":"NVIDIA: Nemotron Nano 12B 2 VL","created":1761675565,"description":"NVIDIA Nemotron Nano 2 VL is a 12-billion-parameter open multimodal reasoning model designed for video understanding and document intelligence. It introduces a hybrid Transformer-Mamba architecture, combining transformer-level accuracy with Mamba’s...","context_length":131072,"architecture":{"modality":"text+image+video->text","input_modalities":["image","text","video"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":8.379000000000001e-08,"completion":2.5137e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.010982522880000001,"max_completion_cost":0.00411844608,"max_cost":0.013728153600000001},"sats_pricing":{"prompt":0.00010404944055885442,"completion":0.00031214832167656326,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":13.637968272930166,"max_completion_cost":5.114238102348812,"max_cost":17.047460341162708},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"nvidia/nemotron-nano-12b-v2-vl","alias_ids":null,"forwarded_model_id":null},{"id":"minimax-m2","name":"MiniMax: MiniMax M2","created":1761252093,"description":"MiniMax-M2 is a compact, high-efficiency large language model optimized for end-to-end coding and agentic workflows. With 10 billion activated parameters (230 billion total), it delivers near-frontier intelligence across general reasoning,...","context_length":196608,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":1.0683225000000001e-07,"completion":4.189500000000001e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.1969999999999999e-08,"input_cache_write":0.0,"max_prompt_cost":0.021004075008,"max_completion_cost":0.08236892160000002,"max_cost":0.08236892160000002},"sats_pricing":{"prompt":0.00013266303671253938,"completion":0.0005202472027942722,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.4864205794122058e-05,"input_cache_write":0.0,"max_prompt_cost":26.08261432197894,"max_completion_cost":102.28476204697627,"max_cost":102.28476204697627},"per_request_limits":null,"top_provider":{"context_length":196608,"max_completion_tokens":196608,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"minimax/minimax-m2","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3-vl-32b-instruct","name":"Qwen: Qwen3 VL 32B Instruct","created":1761231332,"description":"Qwen3-VL-32B-Instruct is a large-scale multimodal vision-language model designed for high-precision understanding and reasoning across text, images, and video. With 32 billion parameters, it combines deep visual perception with advanced text...","context_length":131072,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Qwen","instruct_type":null},"pricing":{"prompt":4.357080000000001e-08,"completion":1.7428320000000004e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.005710911897600001,"max_completion_cost":0.005710911897600001,"max_cost":0.009994095820800002},"sats_pricing":{"prompt":5.4105709090604306e-05,"completion":0.00021642283636241723,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":7.091743501923688,"max_completion_cost":7.091743501923688,"max_cost":12.410551128366453},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":32768,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3-vl-32b-instruct","alias_ids":null,"forwarded_model_id":null},{"id":"granite-4.0-h-micro","name":"IBM: Granite 4.0 Micro","created":1760927695,"description":"Granite-4.0-H-Micro is a 3B parameter from the Granite 4 family of models. These models are the latest in a series of models released by IBM. They are fine-tuned for long...","context_length":131000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":7.122150000000001e-09,"completion":4.6084500000000005e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.00093300165,"max_completion_cost":0.0060370695000000005,"max_cost":0.0060370695000000005},"sats_pricing":{"prompt":8.844202447502625e-06,"completion":5.722719230736993e-05,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":1.158590520622844,"max_completion_cost":7.496762192265461,"max_cost":7.496762192265461},"per_request_limits":null,"top_provider":{"context_length":131000,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"ibm-granite/granite-4.0-h-micro","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-5-image-mini","name":"OpenAI: GPT-5 Image Mini","created":1760624583,"description":"GPT-5 Image Mini combines OpenAI's advanced language capabilities, powered by [GPT-5 Mini](https://openrouter.ai/openai/gpt-5-mini), with GPT Image 1 Mini for efficient image generation. This natively multimodal model features superior instruction following, text...","context_length":400000,"architecture":{"modality":"text+image+file->text+image","input_modalities":["file","image","text"],"output_modalities":["image","text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":1.047375e-06,"completion":8.379000000000002e-07,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":9.975e-08,"input_cache_write":0.0,"max_prompt_cost":0.41895000000000004,"max_completion_cost":0.10725120000000002,"max_cost":0.3921372000000001},"sats_pricing":{"prompt":0.0013006180069856802,"completion":0.0010404944055885444,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":0.00012386838161768384,"input_cache_write":0.0,"max_prompt_cost":520.2472027942721,"max_completion_cost":133.18328391533367,"max_cost":486.9513818154387},"per_request_limits":null,"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-5-image-mini","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3-vl-8b-thinking","name":"Qwen: Qwen3 VL 8B Thinking","created":1760463746,"description":"Qwen3-VL-8B-Thinking is the reasoning-optimized variant of the Qwen3-VL-8B multimodal model, designed for advanced visual and textual reasoning across complex scenes, documents, and temporal sequences. It integrates enhanced multimodal alignment and...","context_length":131072,"architecture":{"modality":"text+image->text","input_modalities":["image","text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":4.9017149999999996e-08,"completion":5.718667500000001e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.0064247758847999995,"max_completion_cost":0.018738929664000002,"max_cost":0.023557511577600002},"sats_pricing":{"prompt":6.0868922726929824e-05,"completion":0.0007101374318141814,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":7.978211439664146,"max_completion_cost":23.269783365687097,"max_cost":29.253441945435206},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":32768,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3-vl-8b-thinking","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3-vl-8b-instruct","name":"Qwen: Qwen3 VL 8B Instruct","created":1760463308,"description":"Qwen3-VL-8B-Instruct is a multimodal vision-language model from the Qwen3-VL series, built for high-fidelity understanding and reasoning across text, images, and video. It features improved multimodal fusion with Interleaved-MRoPE for long-horizon...","context_length":131072,"architecture":{"modality":"text+image->text","input_modalities":["image","text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":3.351600000000001e-08,"completion":2.0947500000000004e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.004393009152000001,"max_completion_cost":0.006864076800000001,"max_cost":0.010158833664000002},"sats_pricing":{"prompt":4.161977622354177e-05,"completion":0.0002601236013971361,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":5.455187309172067,"max_completion_cost":8.523730170581356,"max_cost":12.615120652460405},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":32768,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3-vl-8b-instruct","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-5-image","name":"OpenAI: GPT-5 Image","created":1760447986,"description":"[GPT-5](https://openrouter.ai/openai/gpt-5) Image combines OpenAI's GPT-5 model with state-of-the-art image generation capabilities. It offers major improvements in reasoning, code quality, and user experience while incorporating GPT Image 1's superior instruction following,...","context_length":400000,"architecture":{"modality":"text+image+file->text+image","input_modalities":["image","text","file"],"output_modalities":["image","text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":4.1895e-06,"completion":4.1895e-06,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":4.987500000000001e-07,"input_cache_write":0.0,"max_prompt_cost":1.6758000000000002,"max_completion_cost":0.5362560000000001,"max_cost":1.6758000000000002},"sats_pricing":{"prompt":0.005202472027942721,"completion":0.005202472027942721,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":0.0006193419080884193,"input_cache_write":0.0,"max_prompt_cost":2080.9888111770883,"max_completion_cost":665.9164195766683,"max_cost":2080.9888111770883},"per_request_limits":null,"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-5-image","alias_ids":null,"forwarded_model_id":null},{"id":"o3-deep-research","name":"OpenAI: o3 Deep Research","created":1760129661,"description":"o3-deep-research is OpenAI's advanced model for deep research, designed to tackle complex, multi-step research tasks.\n\nNote: This model always uses the 'web_search' tool which adds additional cost.","context_length":200000,"architecture":{"modality":"text+image+file->text","input_modalities":["image","text","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":4.1895e-06,"completion":1.6758e-05,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":9.975000000000002e-07,"input_cache_write":0.0,"max_prompt_cost":0.8379000000000001,"max_completion_cost":1.6758000000000002,"max_cost":2.0947500000000003},"sats_pricing":{"prompt":0.005202472027942721,"completion":0.020809888111770883,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":0.0012386838161768385,"input_cache_write":0.0,"max_prompt_cost":1040.4944055885442,"max_completion_cost":2080.9888111770883,"max_cost":2601.2360139713605},"per_request_limits":null,"top_provider":{"context_length":200000,"max_completion_tokens":100000,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/o3-deep-research-2025-06-26","alias_ids":null,"forwarded_model_id":null},{"id":"o4-mini-deep-research","name":"OpenAI: o4 Mini Deep Research","created":1760129642,"description":"o4-mini-deep-research is OpenAI's faster, more affordable deep research model—ideal for tackling complex, multi-step research tasks.\n\nNote: This model always uses the 'web_search' tool which adds additional cost.","context_length":200000,"architecture":{"modality":"text+image+file->text","input_modalities":["file","image","text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":8.379000000000002e-07,"completion":3.3516000000000007e-06,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":1.995e-07,"input_cache_write":0.0,"max_prompt_cost":0.16758000000000003,"max_completion_cost":0.33516000000000007,"max_cost":0.4189500000000001},"sats_pricing":{"prompt":0.0010404944055885444,"completion":0.004161977622354178,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":0.0002477367632353677,"input_cache_write":0.0,"max_prompt_cost":208.09888111770886,"max_completion_cost":416.1977622354177,"max_cost":520.2472027942722},"per_request_limits":null,"top_provider":{"context_length":200000,"max_completion_tokens":100000,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/o4-mini-deep-research-2025-06-26","alias_ids":null,"forwarded_model_id":null},{"id":"llama-3.3-nemotron-super-49b-v1.5","name":"NVIDIA: Llama 3.3 Nemotron Super 49B V1.5","created":1760101395,"description":"Llama-3.3-Nemotron-Super-49B-v1.5 is a 49B-parameter, English-centric reasoning/chat model derived from Meta’s Llama-3.3-70B-Instruct with a 128K context. It’s post-trained for agentic workflows (RAG, tool calling) via SFT across math, code, science, and...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Llama3","instruct_type":null},"pricing":{"prompt":4.1895000000000006e-08,"completion":1.6758000000000002e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.005491261440000001,"max_completion_cost":0.0027456307200000004,"max_cost":0.007550484480000001},"sats_pricing":{"prompt":5.202472027942721e-05,"completion":0.00020809888111770884,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":6.818984136465083,"max_completion_cost":3.4094920682325416,"max_cost":9.37610318763949},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"nvidia/llama-3.3-nemotron-super-49b-v1.5","alias_ids":null,"forwarded_model_id":null},{"id":"ernie-4.5-21b-a3b-thinking","name":"Baidu: ERNIE 4.5 21B A3B Thinking","created":1760048887,"description":"ERNIE-4.5-21B-A3B-Thinking is Baidu's upgraded lightweight MoE model, refined to boost reasoning depth and quality for top-tier performance in logical puzzles, math, science, coding, text generation, and expert-level academic benchmarks.","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":2.9326499999999998e-08,"completion":1.1730599999999999e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.0038438830079999997,"max_completion_cost":0.0076877660159999995,"max_cost":0.00960970752},"sats_pricing":{"prompt":3.641730419559904e-05,"completion":0.00014566921678239617,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":4.773288895525558,"max_completion_cost":9.546577791051115,"max_cost":11.933222238813894},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":65536,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"baidu/ernie-4.5-21b-a3b-thinking","alias_ids":null,"forwarded_model_id":null},{"id":"gemini-2.5-flash-image","name":"Google: Nano Banana (Gemini 2.5 Flash Image)","created":1759870431,"description":"Gemini 2.5 Flash Image, a.k.a. \"Nano Banana,\" is now generally available. It is a state of the art image generation model with contextual understanding. It is capable of image generation,...","context_length":32768,"architecture":{"modality":"text+image->text+image","input_modalities":["image","text"],"output_modalities":["image","text"],"tokenizer":"Gemini","instruct_type":null},"pricing":{"prompt":8.379000000000001e-08,"completion":6.9825e-07,"request":0.0,"image":1.197e-07,"web_search":0.005586000000000001,"internal_reasoning":9.975000000000002e-07,"input_cache_read":1.1969999999999999e-08,"input_cache_write":3.325e-08,"max_prompt_cost":0.0027456307200000004,"max_completion_cost":0.022880256,"max_cost":0.022880256},"sats_pricing":{"prompt":0.00010404944055885442,"completion":0.0008670786713237868,"request":0.001,"image":0.00014864205794122058,"web_search":6.936629370590294,"internal_reasoning":0.0012386838161768385,"input_cache_read":1.4864205794122058e-05,"input_cache_write":4.1289460539227946e-05,"max_prompt_cost":3.4094920682325416,"max_completion_cost":28.412433901937845,"max_cost":28.412433901937845},"per_request_limits":null,"top_provider":{"context_length":32768,"max_completion_tokens":32768,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"google/gemini-2.5-flash-image","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3-vl-30b-a3b-thinking","name":"Qwen: Qwen3 VL 30B A3B Thinking","created":1759794479,"description":"Qwen3-VL-30B-A3B-Thinking is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Thinking variant enhances reasoning in STEM, math, and complex tasks. It excels...","context_length":131072,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":5.446350000000001e-08,"completion":6.53562e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.007138639872000001,"max_completion_cost":0.021415919616,"max_cost":0.02676989952},"sats_pricing":{"prompt":6.763213636325538e-05,"completion":0.0008115856363590644,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":8.86467937740461,"max_completion_cost":26.59403813221382,"max_cost":33.24254766526728},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":32768,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3-vl-30b-a3b-thinking","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3-vl-30b-a3b-instruct","name":"Qwen: Qwen3 VL 30B A3B Instruct","created":1759794476,"description":"Qwen3-VL-30B-A3B-Instruct is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Instruct variant optimizes instruction-following for general multimodal tasks. It excels in perception...","context_length":131072,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":5.446350000000001e-08,"completion":2.1785400000000004e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.007138639872000001,"max_completion_cost":0.007138639872000001,"max_cost":0.012492619776000002},"sats_pricing":{"prompt":6.763213636325538e-05,"completion":0.00027052854545302153,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":8.86467937740461,"max_completion_cost":8.86467937740461,"max_cost":15.513188910458066},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":32768,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3-vl-30b-a3b-instruct","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-5-pro","name":"OpenAI: GPT-5 Pro","created":1759776663,"description":"GPT-5 Pro is OpenAI’s most advanced model, offering major improvements in reasoning, code quality, and user experience. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and...","context_length":400000,"architecture":{"modality":"text+image+file->text","input_modalities":["image","text","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":6.28425e-06,"completion":5.0274e-05,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":2.5137,"max_completion_cost":6.435072,"max_cost":8.144388},"sats_pricing":{"prompt":0.007803708041914081,"completion":0.06242966433531265,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":3121.4832167656323,"max_completion_cost":7990.997034920018,"max_cost":10113.605622320647},"per_request_limits":null,"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-5-pro-2025-10-06","alias_ids":null,"forwarded_model_id":null},{"id":"glm-4.6","name":"Z.ai: GLM 4.6","created":1759235576,"description":"Compared with GLM-4.5, this generation brings several key improvements: Longer context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex...","context_length":204800,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":1.633905e-07,"completion":7.96005e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.0334623744,"max_completion_cost":0.163021824,"max_cost":0.163021824},"sats_pricing":{"prompt":0.0002028964090897661,"completion":0.0009884696853091168,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":41.5531845815841,"max_completion_cost":202.43859155130716,"max_cost":202.43859155130716},"per_request_limits":null,"top_provider":{"context_length":204800,"max_completion_tokens":204800,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"z-ai/glm-4.6","alias_ids":null,"forwarded_model_id":null},{"id":"claude-sonnet-4.5","name":"Anthropic: Claude Sonnet 4.5","created":1759161676,"description":"Claude Sonnet 4.5 is Anthropic’s most advanced Sonnet model to date, optimized for real-world agents and coding workflows. It delivers state-of-the-art performance on coding benchmarks such as SWE-bench Verified, with...","context_length":1000000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Claude","instruct_type":null},"pricing":{"prompt":1.25685e-06,"completion":6.28425e-06,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":1.197e-07,"input_cache_write":1.49625e-06,"max_prompt_cost":1.25685,"max_completion_cost":0.402192,"max_cost":1.5786036},"sats_pricing":{"prompt":0.0015607416083828162,"completion":0.007803708041914081,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":0.00014864205794122058,"input_cache_write":0.0018580257242652575,"max_prompt_cost":1560.7416083828161,"max_completion_cost":499.43731468250115,"max_cost":1960.291460128817},"per_request_limits":null,"top_provider":{"context_length":1000000,"max_completion_tokens":64000,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"anthropic/claude-4.5-sonnet-20250929","alias_ids":null,"forwarded_model_id":null},{"id":"deepseek-v3.2-exp","name":"DeepSeek: DeepSeek V3.2 Exp","created":1759150481,"description":"DeepSeek-V3.2-Exp is an experimental large language model released by DeepSeek as an intermediate step between V3.1 and future architectures. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism...","context_length":163840,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek","instruct_type":"deepseek-v3.1"},"pricing":{"prompt":1.1311649999999999e-07,"completion":1.717695e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.01853300736,"max_completion_cost":0.011257085952,"max_cost":0.022376890368},"sats_pricing":{"prompt":0.00014046674475445345,"completion":0.00021330135314565153,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":23.014071460569653,"max_completion_cost":13.978917479753418,"max_cost":27.78736035609521},"per_request_limits":null,"top_provider":{"context_length":163840,"max_completion_tokens":65536,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"deepseek/deepseek-v3.2-exp","alias_ids":null,"forwarded_model_id":null},{"id":"cydonia-24b-v4.1","name":"TheDrummer: Cydonia 24B V4.1","created":1758931878,"description":"Uncensored and creative writing model based on Mistral Small 3.2 24B with good recall, prompt adherence, and intelligence.","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":1.25685e-07,"completion":2.0947500000000004e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":5.985e-08,"input_cache_write":0.0,"max_prompt_cost":0.01647378432,"max_completion_cost":0.027456307200000005,"max_cost":0.027456307200000005},"sats_pricing":{"prompt":0.00015607416083828163,"completion":0.0002601236013971361,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":7.432102897061029e-05,"input_cache_write":0.0,"max_prompt_cost":20.45695240939525,"max_completion_cost":34.09492068232542,"max_cost":34.09492068232542},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":131072,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"thedrummer/cydonia-24b-v4.1","alias_ids":null,"forwarded_model_id":null},{"id":"relace-apply-3","name":"Relace: Relace Apply 3","created":1758891572,"description":"Relace Apply 3 is a specialized code-patching LLM that merges AI-suggested edits straight into your source files. It can apply updates from GPT-4o, Claude, and others into your files at...","context_length":256000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":3.561075e-07,"completion":5.236875e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.09116352,"max_completion_cost":0.06703200000000001,"max_cost":0.11261376000000001},"sats_pricing":{"prompt":0.00044221012237513125,"completion":0.0006503090034928401,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":113.2057913280336,"max_completion_cost":83.23955244708354,"max_cost":139.84244811110034},"per_request_limits":null,"top_provider":{"context_length":256000,"max_completion_tokens":128000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"relace/relace-apply-3","alias_ids":null,"forwarded_model_id":null},{"id":"gemini-2.5-flash-lite-preview-09-2025","name":"Google: Gemini 2.5 Flash Lite Preview 09-2025","created":1758819686,"description":"Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency. It offers improved throughput, faster token generation, and better performance...","context_length":1048576,"architecture":{"modality":"text+image+file+audio+video->text","input_modalities":["text","image","file","audio","video"],"output_modalities":["text"],"tokenizer":"Gemini","instruct_type":null},"pricing":{"prompt":2.7930000000000003e-08,"completion":1.1172000000000001e-07,"request":0.0,"image":3.99e-08,"web_search":0.005586000000000001,"internal_reasoning":1.596e-07,"input_cache_read":3.9900000000000005e-09,"input_cache_write":3.325e-08,"max_prompt_cost":0.029286727680000003,"max_completion_cost":0.0073215702,"max_cost":0.034777905330000004},"sats_pricing":{"prompt":3.468314685295147e-05,"completion":0.00013873258741180588,"request":0.001,"image":4.954735264707353e-05,"web_search":6.936629370590294,"internal_reasoning":0.00019818941058829411,"input_cache_read":4.954735264707353e-06,"input_cache_write":4.1289460539227946e-05,"max_prompt_cost":36.36791539448044,"max_completion_cost":9.0918401160327,"max_cost":43.18679548150497},"per_request_limits":null,"top_provider":{"context_length":1048576,"max_completion_tokens":65535,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"google/gemini-2.5-flash-lite-preview-09-2025","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3-vl-235b-a22b-thinking","name":"Qwen: Qwen3 VL 235B A22B Thinking","created":1758668690,"description":"Qwen3-VL-235B-A22B Thinking is a multimodal model that unifies strong text generation with visual understanding across images and video. The Thinking model is optimized for multimodal reasoning in STEM and math....","context_length":131072,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":1.0892700000000002e-07,"completion":1.0892700000000001e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.014277279744000003,"max_completion_cost":0.035693199360000004,"max_cost":0.04640115916800001},"sats_pricing":{"prompt":0.00013526427272651077,"completion":0.0013526427272651075,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":17.72935875480922,"max_completion_cost":44.323396887023044,"max_cost":57.62041595312996},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":32768,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3-vl-235b-a22b-thinking","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3-vl-235b-a22b-instruct","name":"Qwen: Qwen3 VL 235B A22B Instruct","created":1758668687,"description":"Qwen3-VL-235B-A22B Instruct is an open-weight multimodal model that unifies strong text generation with visual understanding across images and video. The Instruct model targets general vision-language use (VQA, document parsing, chart/table...","context_length":262144,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":8.379000000000001e-08,"completion":3.6867600000000004e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":4.3890000000000005e-08,"input_cache_write":0.0,"max_prompt_cost":0.021965045760000003,"max_completion_cost":0.006040387584000001,"max_cost":0.026632617984000004},"sats_pricing":{"prompt":0.00010404944055885442,"completion":0.00045781753845895945,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":5.450208791178089e-05,"input_cache_write":0.0,"max_prompt_cost":27.275936545860333,"max_completion_cost":7.500882550111592,"max_cost":33.072073061855654},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3-vl-235b-a22b-instruct","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3-max","name":"Qwen: Qwen3 Max","created":1758662808,"description":"Qwen3-Max is an updated release built on the Qwen3 series, offering major improvements in reasoning, instruction following, multilingual support, and long-tail knowledge coverage compared to the January 2025 version. It...","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":3.26781e-07,"completion":1.633905e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":6.2244e-08,"input_cache_write":3.89025e-07,"max_prompt_cost":0.085663678464,"max_completion_cost":0.05353979904,"max_cost":0.12849551769599998},"sats_pricing":{"prompt":0.0004057928181795322,"completion":0.002028964090897661,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":7.72938701294347e-05,"input_cache_write":0.00048308668830896693,"max_prompt_cost":106.37615252885529,"max_completion_cost":66.48509533053456,"max_cost":159.56422879328292},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":32768,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3-max","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3-coder-plus","name":"Qwen: Qwen3 Coder Plus","created":1758662707,"description":"Qwen3 Coder Plus is Alibaba's proprietary version of the Open Source Qwen3 Coder 480B A35B. It is a powerful coding agent model specializing in autonomous programming via tool calling and...","context_length":1000000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":2.7231750000000003e-07,"completion":1.3615875e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":5.187e-08,"input_cache_write":3.241875e-07,"max_prompt_cost":0.27231750000000005,"max_completion_cost":0.0892329984,"max_cost":0.34370389872},"sats_pricing":{"prompt":0.0003381606818162769,"completion":0.001690803409081384,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":6.441155844119559e-05,"input_cache_write":0.0004025722402574724,"max_prompt_cost":338.1606818162769,"max_completion_cost":110.80849221755759,"max_cost":426.8074755903229},"per_request_limits":null,"top_provider":{"context_length":1000000,"max_completion_tokens":65536,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3-coder-plus","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-5-codex","name":"OpenAI: GPT-5 Codex","created":1758643403,"description":"GPT-5-Codex is a specialized version of GPT-5 optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks....","context_length":400000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":5.236875e-07,"completion":4.1895e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":4.9875e-08,"input_cache_write":0.0,"max_prompt_cost":0.20947500000000002,"max_completion_cost":0.5362560000000001,"max_cost":0.678699},"sats_pricing":{"prompt":0.0006503090034928401,"completion":0.005202472027942721,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":6.193419080884192e-05,"input_cache_write":0.0,"max_prompt_cost":260.12360139713604,"max_completion_cost":665.9164195766683,"max_cost":842.8004685267208},"per_request_limits":null,"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-5-codex","alias_ids":null,"forwarded_model_id":null},{"id":"deepseek-v3.1-terminus","name":"DeepSeek: DeepSeek V3.1 Terminus","created":1758548275,"description":"DeepSeek-V3.1 Terminus is an update to [DeepSeek V3.1](/deepseek/deepseek-chat-v3.1) that maintains the model's original capabilities while addressing issues reported by users, including language consistency and agent capabilities, further optimizing the model's...","context_length":163840,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek","instruct_type":"deepseek-v3.1"},"pricing":{"prompt":1.1311649999999999e-07,"completion":3.980025e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":5.187e-08,"input_cache_write":0.0,"max_prompt_cost":0.01853300736,"max_completion_cost":0.01304174592,"max_cost":0.027868151808},"sats_pricing":{"prompt":0.00014046674475445345,"completion":0.0004942348426545584,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":6.441155844119559e-05,"input_cache_write":0.0,"max_prompt_cost":23.014071460569653,"max_completion_cost":16.19508732410457,"max_cost":34.6063444925603},"per_request_limits":null,"top_provider":{"context_length":163840,"max_completion_tokens":32768,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"deepseek/deepseek-v3.1-terminus","alias_ids":null,"forwarded_model_id":null},{"id":"grok-4-fast","name":"xAI: Grok 4 Fast","created":1758240090,"description":"Grok 4 Fast is xAI's latest multimodal model with SOTA cost-efficiency and a 2M token context window. It comes in two flavors: non-reasoning and reasoning. Read more about the model...","context_length":2000000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Grok","instruct_type":null},"pricing":{"prompt":8.379000000000001e-08,"completion":2.0947500000000004e-07,"request":0.0,"image":0.0,"web_search":0.0019950000000000002,"internal_reasoning":0.0,"input_cache_read":1.995e-08,"input_cache_write":0.0,"max_prompt_cost":0.16758000000000003,"max_completion_cost":0.006284250000000002,"max_cost":0.17135055000000002},"sats_pricing":{"prompt":0.00010404944055885442,"completion":0.0002601236013971361,"request":0.001,"image":0.0,"web_search":2.4773676323536766,"internal_reasoning":0.0,"input_cache_read":2.4773676323536764e-05,"input_cache_write":0.0,"max_prompt_cost":208.09888111770886,"max_completion_cost":7.803708041914083,"max_cost":212.7811059428573},"per_request_limits":null,"top_provider":{"context_length":2000000,"max_completion_tokens":30000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"x-ai/grok-4-fast","alias_ids":null,"forwarded_model_id":null},{"id":"tongyi-deepresearch-30b-a3b","name":"Tongyi DeepResearch 30B A3B","created":1758210804,"description":"Tongyi DeepResearch is an agentic large language model developed by Tongyi Lab, with 30 billion total parameters activating only 3 billion per token. It's optimized for long-horizon, deep information-seeking tasks...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":3.7705500000000006e-08,"completion":1.885275e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":3.591e-08,"input_cache_write":0.0,"max_prompt_cost":0.004942135296000001,"max_completion_cost":0.02471067648,"max_cost":0.02471067648},"sats_pricing":{"prompt":4.6822248251484494e-05,"completion":0.00023411124125742241,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":4.459261738236618e-05,"input_cache_write":0.0,"max_prompt_cost":6.137085722818576,"max_completion_cost":30.68542861409287,"max_cost":30.68542861409287},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":131072,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"alibaba/tongyi-deepresearch-30b-a3b","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3-coder-flash","name":"Qwen: Qwen3 Coder Flash","created":1758115536,"description":"Qwen3 Coder Flash is Alibaba's fast and cost efficient version of their proprietary Qwen3 Coder Plus. It is a powerful coding agent model specializing in autonomous programming via tool calling...","context_length":1000000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":8.169525e-08,"completion":4.0847625e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.5561e-08,"input_cache_write":9.725625e-08,"max_prompt_cost":0.08169525,"max_completion_cost":0.02676989952,"max_cost":0.103111169616},"sats_pricing":{"prompt":0.00010144820454488305,"completion":0.0005072410227244153,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.9323467532358675e-05,"input_cache_write":0.00012077167207724173,"max_prompt_cost":101.44820454488304,"max_completion_cost":33.24254766526728,"max_cost":128.04224267709688},"per_request_limits":null,"top_provider":{"context_length":1000000,"max_completion_tokens":65536,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3-coder-flash","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3-next-80b-a3b-thinking","name":"Qwen: Qwen3 Next 80B A3B Thinking","created":1757612284,"description":"Qwen3-Next-80B-A3B-Thinking is a reasoning-first chat model in the Qwen3-Next line that outputs structured “thinking” traces by default. It’s designed for hard multi-step problems; math proofs, code synthesis/debugging, logic, and agentic...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":4.0847625e-08,"completion":3.26781e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.005353979904,"max_completion_cost":0.010707959808,"max_cost":0.014723444736},"sats_pricing":{"prompt":5.0724102272441524e-05,"completion":0.0004057928181795322,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":6.648509533053455,"max_completion_cost":13.29701906610691,"max_cost":18.283401215897},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":32768,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3-next-80b-a3b-thinking-2509","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3-next-80b-a3b-instruct","name":"Qwen: Qwen3 Next 80B A3B Instruct","created":1757612213,"description":"Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual...","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":3.7705500000000006e-08,"completion":4.60845e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.009884270592000002,"max_completion_cost":0.00755048448,"max_cost":0.01681698816},"sats_pricing":{"prompt":4.6822248251484494e-05,"completion":0.0005722719230736992,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":12.274171445637151,"max_completion_cost":9.376103187639488,"max_cost":20.883138917924317},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3-next-80b-a3b-instruct-2509","alias_ids":null,"forwarded_model_id":null},{"id":"qwen-plus-2025-07-28:thinking","name":"Qwen: Qwen Plus 0728 (thinking)","created":1757347599,"description":"Qwen Plus 0728, based on the Qwen3 foundation model, is a 1 million context hybrid reasoning model with a balanced performance, speed, and cost combination.","context_length":1000000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":1.0892700000000002e-07,"completion":3.26781e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":1.29675e-07,"max_prompt_cost":0.10892700000000002,"max_completion_cost":0.010707959808,"max_cost":0.11606563987200003},"sats_pricing":{"prompt":0.00013526427272651077,"completion":0.0004057928181795322,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.00016102889610298898,"max_prompt_cost":135.26427272651077,"max_completion_cost":13.29701906610691,"max_cost":144.12895210391537},"per_request_limits":null,"top_provider":{"context_length":1000000,"max_completion_tokens":32768,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen-plus-2025-07-28","alias_ids":null,"forwarded_model_id":null},{"id":"qwen-plus-2025-07-28","name":"Qwen: Qwen Plus 0728","created":1757347599,"description":"Qwen Plus 0728, based on the Qwen3 foundation model, is a 1 million context hybrid reasoning model with a balanced performance, speed, and cost combination.","context_length":1000000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":1.0892700000000002e-07,"completion":3.26781e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":1.29675e-07,"max_prompt_cost":0.10892700000000002,"max_completion_cost":0.010707959808,"max_cost":0.11606563987200003},"sats_pricing":{"prompt":0.00013526427272651077,"completion":0.0004057928181795322,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.00016102889610298898,"max_prompt_cost":135.26427272651077,"max_completion_cost":13.29701906610691,"max_cost":144.12895210391537},"per_request_limits":null,"top_provider":{"context_length":1000000,"max_completion_tokens":32768,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen-plus-2025-07-28","alias_ids":null,"forwarded_model_id":null},{"id":"nemotron-nano-9b-v2","name":"NVIDIA: Nemotron Nano 9B V2","created":1757106807,"description":"NVIDIA-Nemotron-Nano-9B-v2 is a large language model (LLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks. It responds to user queries and...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":1.6758000000000004e-08,"completion":6.703200000000001e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.0021965045760000005,"max_completion_cost":0.0010982522880000002,"max_cost":0.0030201937920000007},"sats_pricing":{"prompt":2.0809888111770886e-05,"completion":8.323955244708354e-05,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":2.7275936545860335,"max_completion_cost":1.3637968272930168,"max_cost":3.7504412750557963},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"nvidia/nemotron-nano-9b-v2","alias_ids":null,"forwarded_model_id":null},{"id":"kimi-k2-0905","name":"MoonshotAI: Kimi K2 0905","created":1757021147,"description":"Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32...","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":1.6758000000000002e-07,"completion":8.379000000000002e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.043930091520000006,"max_completion_cost":0.21965045760000004,"max_cost":0.21965045760000004},"sats_pricing":{"prompt":0.00020809888111770884,"completion":0.0010404944055885444,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":54.551873091720665,"max_completion_cost":272.7593654586034,"max_cost":272.7593654586034},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":262144,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"moonshotai/kimi-k2-0905","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3-30b-a3b-thinking-2507","name":"Qwen: Qwen3 30B A3B Thinking 2507","created":1756399192,"description":"Qwen3-30B-A3B-Thinking-2507 is a 30B parameter Mixture-of-Experts reasoning model optimized for complex tasks requiring extended multi-step thinking. The model is designed specifically for “thinking mode,” where internal reasoning traces are separated...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":3.351600000000001e-08,"completion":1.6758000000000002e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":3.1920000000000004e-08,"input_cache_write":0.0,"max_prompt_cost":0.004393009152000001,"max_completion_cost":0.021965045760000003,"max_cost":0.021965045760000003},"sats_pricing":{"prompt":4.161977622354177e-05,"completion":0.00020809888111770884,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":3.9637882117658825e-05,"input_cache_write":0.0,"max_prompt_cost":5.455187309172067,"max_completion_cost":27.275936545860333,"max_cost":27.275936545860333},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":131072,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3-30b-a3b-thinking-2507","alias_ids":null,"forwarded_model_id":null},{"id":"grok-code-fast-1","name":"xAI: Grok Code Fast 1","created":1756238927,"description":"Grok Code Fast 1 is a speedy and economical reasoning model that excels at agentic coding. With reasoning traces visible in the response, developers can steer Grok Code for high-quality...","context_length":256000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Grok","instruct_type":null},"pricing":{"prompt":8.379000000000001e-08,"completion":6.28425e-07,"request":0.0,"image":0.0,"web_search":0.0019950000000000002,"internal_reasoning":0.0,"input_cache_read":7.980000000000001e-09,"input_cache_write":0.0,"max_prompt_cost":0.021450240000000002,"max_completion_cost":0.006284250000000001,"max_cost":0.026896590000000005},"sats_pricing":{"prompt":0.00010404944055885442,"completion":0.0007803708041914081,"request":0.001,"image":0.0,"web_search":2.4773676323536766,"internal_reasoning":0.0,"input_cache_read":9.909470529414706e-06,"input_cache_write":0.0,"max_prompt_cost":26.63665678306673,"max_completion_cost":7.803708041914081,"max_cost":33.39987041939227},"per_request_limits":null,"top_provider":{"context_length":256000,"max_completion_tokens":10000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"x-ai/grok-code-fast-1","alias_ids":null,"forwarded_model_id":null},{"id":"hermes-4-70b","name":"Nous: Hermes 4 70B","created":1756236182,"description":"Hermes 4 70B is a hybrid reasoning model from Nous Research, built on Meta-Llama-3.1-70B. It introduces the same hybrid mode as the larger 405B release, allowing the model to either...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Llama3","instruct_type":null},"pricing":{"prompt":5.446350000000001e-08,"completion":1.6758000000000002e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.007138639872000001,"max_completion_cost":0.021965045760000003,"max_cost":0.021965045760000003},"sats_pricing":{"prompt":6.763213636325538e-05,"completion":0.00020809888111770884,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":8.86467937740461,"max_completion_cost":27.275936545860333,"max_cost":27.275936545860333},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"nousresearch/hermes-4-70b","alias_ids":null,"forwarded_model_id":null},{"id":"hermes-4-405b","name":"Nous: Hermes 4 405B","created":1756235463,"description":"Hermes 4 is a large-scale reasoning model built on Meta-Llama-3.1-405B and released by Nous Research. It introduces a hybrid reasoning mode, where the model can choose to deliberate internally with...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":4.189500000000001e-07,"completion":1.25685e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.05491261440000001,"max_completion_cost":0.1647378432,"max_cost":0.1647378432},"sats_pricing":{"prompt":0.0005202472027942722,"completion":0.0015607416083828162,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":68.18984136465085,"max_completion_cost":204.56952409395248,"max_cost":204.56952409395248},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"nousresearch/hermes-4-405b","alias_ids":null,"forwarded_model_id":null},{"id":"deepseek-chat-v3.1","name":"DeepSeek: DeepSeek V3.1","created":1755779628,"description":"DeepSeek-V3.1 is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates. It extends the DeepSeek-V3 base with a two-phase long-context...","context_length":32768,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek","instruct_type":"deepseek-v3.1"},"pricing":{"prompt":6.28425e-08,"completion":3.142125e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.00205922304,"max_completion_cost":0.0022522752000000003,"max_cost":0.0038610432000000003},"sats_pricing":{"prompt":7.803708041914081e-05,"completion":0.00039018540209570404,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":2.557119051174406,"max_completion_cost":2.7968489622220067,"max_cost":4.794598220952012},"per_request_limits":null,"top_provider":{"context_length":32768,"max_completion_tokens":7168,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"deepseek/deepseek-chat-v3.1","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-4o-audio-preview","name":"OpenAI: GPT-4o Audio","created":1755233061,"description":"The gpt-4o-audio-preview model adds support for audio inputs as prompts. This enhancement allows the model to detect nuances within audio recordings and add depth to generated user experiences. Audio outputs...","context_length":128000,"architecture":{"modality":"text+audio->text+audio","input_modalities":["audio","text"],"output_modalities":["text","audio"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":1.047375e-06,"completion":4.1895e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.13406400000000002,"max_completion_cost":0.068640768,"max_cost":0.18554457600000002},"sats_pricing":{"prompt":0.0013006180069856802,"completion":0.005202472027942721,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":166.47910489416708,"max_completion_cost":85.23730170581354,"max_cost":230.40708117352722},"per_request_limits":null,"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-4o-audio-preview","alias_ids":null,"forwarded_model_id":null},{"id":"mistral-medium-3.1","name":"Mistral: Mistral Medium 3.1","created":1755095639,"description":"Mistral Medium 3.1 is an updated version of Mistral Medium 3, which is a high-performance enterprise-grade language model designed to deliver frontier-level capabilities at significantly reduced operational cost. It balances...","context_length":131072,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":1.6758000000000002e-07,"completion":8.379000000000002e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.5960000000000002e-08,"input_cache_write":0.0,"max_prompt_cost":0.021965045760000003,"max_completion_cost":0.10982522880000002,"max_cost":0.10982522880000002},"sats_pricing":{"prompt":0.00020809888111770884,"completion":0.0010404944055885444,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.9818941058829413e-05,"input_cache_write":0.0,"max_prompt_cost":27.275936545860333,"max_completion_cost":136.3796827293017,"max_cost":136.3796827293017},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"mistralai/mistral-medium-3.1","alias_ids":null,"forwarded_model_id":null},{"id":"ernie-4.5-21b-a3b","name":"Baidu: ERNIE 4.5 21B A3B","created":1755034167,"description":"A sophisticated text-based Mixture-of-Experts (MoE) model featuring 21B total parameters with 3B activated per token, delivering exceptional multimodal understanding and generation through heterogeneous MoE structures and modality-isolated routing. Supporting an...","context_length":120000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":2.9326499999999998e-08,"completion":1.1730599999999999e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.00351918,"max_completion_cost":0.0009384479999999999,"max_cost":0.004223016},"sats_pricing":{"prompt":3.641730419559904e-05,"completion":0.00014566921678239617,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":4.370076503471885,"max_completion_cost":1.1653537342591693,"max_cost":5.244091804166262},"per_request_limits":null,"top_provider":{"context_length":120000,"max_completion_tokens":8000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"baidu/ernie-4.5-21b-a3b","alias_ids":null,"forwarded_model_id":null},{"id":"ernie-4.5-vl-28b-a3b","name":"Baidu: ERNIE 4.5 VL 28B A3B","created":1755032836,"description":"A powerful multimodal Mixture-of-Experts chat model featuring 28B total parameters with 3B activated per token, delivering exceptional text and vision understanding through its innovative heterogeneous MoE structure with modality-isolated routing....","context_length":30000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":5.8652999999999996e-08,"completion":2.3461199999999998e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.00175959,"max_completion_cost":0.0018768959999999999,"max_cost":0.0031672619999999997},"sats_pricing":{"prompt":7.283460839119808e-05,"completion":0.00029133843356479234,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":2.1850382517359423,"max_completion_cost":2.3307074685183387,"max_cost":3.9330688531246962},"per_request_limits":null,"top_provider":{"context_length":30000,"max_completion_tokens":8000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"baidu/ernie-4.5-vl-28b-a3b","alias_ids":null,"forwarded_model_id":null},{"id":"glm-4.5v","name":"Z.ai: GLM 4.5V","created":1754922288,"description":"GLM-4.5V is a vision-language foundation model for multimodal agent applications. Built on a Mixture-of-Experts (MoE) architecture with 106B parameters and 12B activated parameters, it achieves state-of-the-art results in video understanding,...","context_length":65536,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":2.5137e-07,"completion":7.5411e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":4.3890000000000005e-08,"input_cache_write":0.0,"max_prompt_cost":0.01647378432,"max_completion_cost":0.01235533824,"max_cost":0.02471067648},"sats_pricing":{"prompt":0.00031214832167656326,"completion":0.0009364449650296897,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":5.450208791178089e-05,"input_cache_write":0.0,"max_prompt_cost":20.45695240939525,"max_completion_cost":15.342714307046435,"max_cost":30.68542861409287},"per_request_limits":null,"top_provider":{"context_length":65536,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"z-ai/glm-4.5v","alias_ids":null,"forwarded_model_id":null},{"id":"jamba-large-1.7","name":"AI21: Jamba Large 1.7","created":1754669020,"description":"Jamba Large 1.7 is the latest model in the Jamba open family, offering improvements in grounding, instruction-following, and overall efficiency. Built on a hybrid SSM-Transformer architecture with a 256K context...","context_length":256000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":8.379000000000002e-07,"completion":3.3516000000000007e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.21450240000000004,"max_completion_cost":0.013728153600000003,"max_cost":0.22479851520000005},"sats_pricing":{"prompt":0.0010404944055885444,"completion":0.004161977622354178,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":266.36656783066735,"max_completion_cost":17.04746034116271,"max_cost":279.15216308653936},"per_request_limits":null,"top_provider":{"context_length":256000,"max_completion_tokens":4096,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"ai21/jamba-large-1.7","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-5-chat","name":"OpenAI: GPT-5 Chat","created":1754587837,"description":"GPT-5 Chat is designed for advanced, natural, multimodal, and context-aware conversations for enterprise applications.","context_length":128000,"architecture":{"modality":"text+image+file->text","input_modalities":["file","image","text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":5.236875e-07,"completion":4.1895e-06,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":4.9875e-08,"input_cache_write":0.0,"max_prompt_cost":0.06703200000000001,"max_completion_cost":0.068640768,"max_cost":0.12709267200000002},"sats_pricing":{"prompt":0.0006503090034928401,"completion":0.005202472027942721,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":6.193419080884192e-05,"input_cache_write":0.0,"max_prompt_cost":83.23955244708354,"max_completion_cost":85.23730170581354,"max_cost":157.82219143967038},"per_request_limits":null,"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-5-chat-2025-08-07","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-5","name":"OpenAI: GPT-5","created":1754587413,"description":"GPT-5 is OpenAI’s most advanced model, offering major improvements in reasoning, code quality, and user experience. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and accuracy...","context_length":400000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":5.236875e-07,"completion":4.1895e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":4.9875e-08,"input_cache_write":0.0,"max_prompt_cost":0.20947500000000002,"max_completion_cost":0.5362560000000001,"max_cost":0.678699},"sats_pricing":{"prompt":0.0006503090034928401,"completion":0.005202472027942721,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":6.193419080884192e-05,"input_cache_write":0.0,"max_prompt_cost":260.12360139713604,"max_completion_cost":665.9164195766683,"max_cost":842.8004685267208},"per_request_limits":null,"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-5-2025-08-07","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-5-mini","name":"OpenAI: GPT-5 Mini","created":1754587407,"description":"GPT-5 Mini is a compact version of GPT-5, designed to handle lighter-weight reasoning tasks. It provides the same instruction-following and safety-tuning benefits as GPT-5, but with reduced latency and cost....","context_length":400000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":1.0473750000000002e-07,"completion":8.379000000000002e-07,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":9.975e-09,"input_cache_write":0.0,"max_prompt_cost":0.04189500000000001,"max_completion_cost":0.10725120000000002,"max_cost":0.13573980000000002},"sats_pricing":{"prompt":0.00013006180069856805,"completion":0.0010404944055885444,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":1.2386838161768382e-05,"input_cache_write":0.0,"max_prompt_cost":52.024720279427214,"max_completion_cost":133.18328391533367,"max_cost":168.56009370534417},"per_request_limits":null,"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-5-mini-2025-08-07","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-5-nano","name":"OpenAI: GPT-5 Nano","created":1754587402,"description":"GPT-5-Nano is the smallest and fastest variant in the GPT-5 system, optimized for developer tools, rapid interactions, and ultra-low latency environments. While limited in reasoning depth compared to its larger...","context_length":400000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":2.0947500000000003e-08,"completion":1.6758000000000002e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":3.9900000000000005e-09,"input_cache_write":0.0,"max_prompt_cost":0.008379000000000001,"max_completion_cost":0.06703200000000001,"max_cost":0.06703200000000001},"sats_pricing":{"prompt":2.6012360139713605e-05,"completion":0.00020809888111770884,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":4.954735264707353e-06,"input_cache_write":0.0,"max_prompt_cost":10.404944055885442,"max_completion_cost":83.23955244708354,"max_cost":83.23955244708354},"per_request_limits":null,"top_provider":{"context_length":400000,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-5-nano-2025-08-07","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-oss-120b","name":"GPT-OSS 120B (Private via TEE)","created":1777854613,"description":"PPQ.AI model","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Unknown","instruct_type":null},"pricing":{"prompt":3.142125000000001e-07,"completion":5.236875e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.04118446080000001,"max_completion_cost":0.068640768,"max_cost":0.068640768},"sats_pricing":{"prompt":0.00039018540209570415,"completion":0.0006503090034928401,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":51.142381023488134,"max_completion_cost":85.23730170581354,"max_cost":85.23730170581354},"per_request_limits":null,"top_provider":null,"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":null,"alias_ids":null,"forwarded_model_id":null},{"id":"gpt-oss-20b","name":"OpenAI: gpt-oss-20b","created":1754414229,"description":"gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":1.25685e-08,"completion":5.8652999999999996e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.001647378432,"max_completion_cost":0.0076877660159999995,"max_cost":0.0076877660159999995},"sats_pricing":{"prompt":1.560741608382816e-05,"completion":7.283460839119808e-05,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":2.0456952409395246,"max_completion_cost":9.546577791051115,"max_cost":9.546577791051115},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":131072,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-oss-20b","alias_ids":null,"forwarded_model_id":null},{"id":"claude-opus-4.1","name":"Anthropic: Claude Opus 4.1","created":1754411591,"description":"Claude Opus 4.1 is an updated version of Anthropic’s flagship model, offering improved performance in coding, reasoning, and agentic tasks. It achieves 74.5% on SWE-bench Verified and shows notable gains...","context_length":200000,"architecture":{"modality":"text+image+file->text","input_modalities":["image","text","file"],"output_modalities":["text"],"tokenizer":"Claude","instruct_type":null},"pricing":{"prompt":6.28425e-06,"completion":3.142125e-05,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":5.985000000000001e-07,"input_cache_write":7.48125e-06,"max_prompt_cost":1.25685,"max_completion_cost":1.00548,"max_cost":2.061234},"sats_pricing":{"prompt":0.007803708041914081,"completion":0.03901854020957041,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":0.000743210289706103,"input_cache_write":0.009290128621326285,"max_prompt_cost":1560.7416083828161,"max_completion_cost":1248.593286706253,"max_cost":2559.616237747818},"per_request_limits":null,"top_provider":{"context_length":200000,"max_completion_tokens":32000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"anthropic/claude-4.1-opus-20250805","alias_ids":null,"forwarded_model_id":null},{"id":"codestral-2508","name":"Mistral: Codestral 2508","created":1754079630,"description":"Mistral's cutting-edge language model for coding released end of July 2025. Codestral specializes in low-latency, high-frequency tasks such as fill-in-the-middle (FIM), code correction and test generation.\n\n[Blog Post](https://mistral.ai/news/codestral-25-08)","context_length":256000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":1.25685e-07,"completion":3.77055e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.1969999999999999e-08,"input_cache_write":0.0,"max_prompt_cost":0.03217536,"max_completion_cost":0.09652608,"max_cost":0.09652608},"sats_pricing":{"prompt":0.00015607416083828163,"completion":0.00046822248251484483,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.4864205794122058e-05,"input_cache_write":0.0,"max_prompt_cost":39.9549851746001,"max_completion_cost":119.86495552380028,"max_cost":119.86495552380028},"per_request_limits":null,"top_provider":{"context_length":256000,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"mistralai/codestral-2508","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3-coder-30b-a3b-instruct","name":"Qwen: Qwen3 Coder 30B A3B Instruct","created":1753972379,"description":"Qwen3-Coder-30B-A3B-Instruct is a 30.5B parameter Mixture-of-Experts (MoE) model with 128 experts (8 active per forward pass), designed for advanced code generation, repository-scale understanding, and agentic tool use. Built on the...","context_length":160000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":2.9326499999999998e-08,"completion":1.1311649999999999e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.004692239999999999,"max_completion_cost":0.0037066014719999998,"max_cost":0.00743787072},"sats_pricing":{"prompt":3.641730419559904e-05,"completion":0.00014046674475445345,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":5.826768671295846,"max_completion_cost":4.602814292113931,"max_cost":9.236260739528388},"per_request_limits":null,"top_provider":{"context_length":160000,"max_completion_tokens":32768,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3-coder-30b-a3b-instruct","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3-30b-a3b-instruct-2507","name":"Qwen: Qwen3 30B A3B Instruct 2507","created":1753806965,"description":"Qwen3-30B-A3B-Instruct-2507 is a 30.5B-parameter mixture-of-experts language model from Qwen, with 3.3B active parameters per inference. It operates in non-thinking mode and is designed for high-quality instruction following, multilingual understanding, and...","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":3.7705500000000006e-08,"completion":1.25685e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.009884270592000002,"max_completion_cost":0.03294756864,"max_cost":0.03294756864},"sats_pricing":{"prompt":4.6822248251484494e-05,"completion":0.00015607416083828163,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":12.274171445637151,"max_completion_cost":40.9139048187905,"max_cost":40.9139048187905},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":262144,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3-30b-a3b-instruct-2507","alias_ids":null,"forwarded_model_id":null},{"id":"glm-4.5","name":"Z.ai: GLM 4.5","created":1753471347,"description":"GLM-4.5 is our latest flagship foundation model, purpose-built for agent-based applications. It leverages a Mixture-of-Experts (MoE) architecture and supports a context length of up to 128k tokens. GLM-4.5 delivers significantly...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":2.5137e-07,"completion":9.2169e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":4.3890000000000005e-08,"input_cache_write":0.0,"max_prompt_cost":0.03294756864,"max_completion_cost":0.09060581376,"max_cost":0.09884270592},"sats_pricing":{"prompt":0.00031214832167656326,"completion":0.0011445438461473985,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":5.450208791178089e-05,"input_cache_write":0.0,"max_prompt_cost":40.9139048187905,"max_completion_cost":112.51323825167387,"max_cost":122.74171445637148},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":98304,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"z-ai/glm-4.5","alias_ids":null,"forwarded_model_id":null},{"id":"glm-4.5-air","name":"Z.ai: GLM 4.5 Air","created":1753471258,"description":"GLM-4.5-Air is the lightweight variant of our latest flagship model family, also purpose-built for agent-centric applications. Like GLM-4.5, it adopts the Mixture-of-Experts (MoE) architecture but with a more compact parameter...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":5.446350000000001e-08,"completion":3.561075e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":9.975e-09,"input_cache_write":0.0,"max_prompt_cost":0.007138639872000001,"max_completion_cost":0.03500679168,"max_cost":0.036791451648000004},"sats_pricing":{"prompt":6.763213636325538e-05,"completion":0.00044221012237513125,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.2386838161768382e-05,"input_cache_write":0.0,"max_prompt_cost":8.86467937740461,"max_completion_cost":43.4710238699649,"max_cost":45.68719371431606},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":98304,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"z-ai/glm-4.5-air","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3-235b-a22b-thinking-2507","name":"Qwen: Qwen3 235B A22B Thinking 2507","created":1753449557,"description":"Qwen3-235B-A22B-Thinking-2507 is a high-performance, open-weight Mixture-of-Experts (MoE) language model optimized for complex reasoning tasks. It activates 22B of its 235B parameters per forward pass and natively supports up to 262,144...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":"qwen3"},"pricing":{"prompt":6.263302500000001e-08,"completion":6.2633025e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.008209435852800002,"max_completion_cost":0.082094358528,"max_cost":0.082094358528},"sats_pricing":{"prompt":7.777695681774368e-05,"completion":0.0007777695681774367,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":10.1943812840153,"max_completion_cost":101.94381284015299,"max_cost":101.94381284015299},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3-235b-a22b-thinking-2507","alias_ids":null,"forwarded_model_id":null},{"id":"glm-4-32b","name":"Z.ai: GLM 4 32B ","created":1753376617,"description":"GLM 4 32B is a cost-effective foundation language model. It can efficiently perform complex tasks and has significantly enhanced capabilities in tool use, online search, and code-related intelligent tasks. It...","context_length":128000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":4.1895000000000006e-08,"completion":4.1895000000000006e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.005362560000000001,"max_completion_cost":0.005362560000000001,"max_cost":0.005362560000000001},"sats_pricing":{"prompt":5.202472027942721e-05,"completion":5.202472027942721e-05,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":6.6591641957666825,"max_completion_cost":6.6591641957666825,"max_cost":6.6591641957666825},"per_request_limits":null,"top_provider":{"context_length":128000,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"z-ai/glm-4-32b-0414","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3-coder","name":"Qwen: Qwen3 Coder 480B A35B","created":1753230546,"description":"Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over...","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":9.216900000000001e-08,"completion":7.5411e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.024161550336000003,"max_completion_cost":0.04942135296,"max_cost":0.067542515712},"sats_pricing":{"prompt":0.00011445438461473986,"completion":0.0009364449650296897,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":30.003530200446367,"max_completion_cost":61.37085722818574,"max_cost":83.8735048785205},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":65536,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3-coder-480b-a35b-07-25","alias_ids":null,"forwarded_model_id":null},{"id":"ui-tars-1.5-7b","name":"ByteDance: UI-TARS 7B ","created":1753205056,"description":"UI-TARS-1.5 is a multimodal vision-language agent optimized for GUI-based environments, including desktop interfaces, web browsers, mobile systems, and games. Built by ByteDance, it builds upon the UI-TARS framework with reinforcement...","context_length":128000,"architecture":{"modality":"text+image->text","input_modalities":["image","text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":4.1895000000000006e-08,"completion":8.379000000000001e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":3.99e-08,"input_cache_write":0.0,"max_prompt_cost":0.005362560000000001,"max_completion_cost":0.00017160192000000002,"max_cost":0.005448360960000001},"sats_pricing":{"prompt":5.202472027942721e-05,"completion":0.00010404944055885442,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":4.954735264707353e-05,"input_cache_write":0.0,"max_prompt_cost":6.6591641957666825,"max_completion_cost":0.21309325426453385,"max_cost":6.76571082289895},"per_request_limits":null,"top_provider":{"context_length":128000,"max_completion_tokens":2048,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"bytedance/ui-tars-1.5-7b","alias_ids":null,"forwarded_model_id":null},{"id":"gemini-2.5-flash-lite","name":"Google: Gemini 2.5 Flash Lite","created":1753200276,"description":"Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency. It offers improved throughput, faster token generation, and better performance...","context_length":1048576,"architecture":{"modality":"text+image+file+audio+video->text","input_modalities":["text","image","file","audio","video"],"output_modalities":["text"],"tokenizer":"Gemini","instruct_type":null},"pricing":{"prompt":2.7930000000000003e-08,"completion":1.1172000000000001e-07,"request":0.0,"image":3.99e-08,"web_search":0.005586000000000001,"internal_reasoning":1.596e-07,"input_cache_read":3.9900000000000005e-09,"input_cache_write":3.325e-08,"max_prompt_cost":0.029286727680000003,"max_completion_cost":0.0073215702,"max_cost":0.034777905330000004},"sats_pricing":{"prompt":3.468314685295147e-05,"completion":0.00013873258741180588,"request":0.001,"image":4.954735264707353e-05,"web_search":6.936629370590294,"internal_reasoning":0.00019818941058829411,"input_cache_read":4.954735264707353e-06,"input_cache_write":4.1289460539227946e-05,"max_prompt_cost":36.36791539448044,"max_completion_cost":9.0918401160327,"max_cost":43.18679548150497},"per_request_limits":null,"top_provider":{"context_length":1048576,"max_completion_tokens":65535,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"google/gemini-2.5-flash-lite","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3-235b-a22b-2507","name":"Qwen: Qwen3 235B A22B Instruct 2507","created":1753119555,"description":"Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following,...","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":2.9745450000000004e-08,"completion":4.1895000000000006e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.007797591244800001,"max_completion_cost":0.0006864076800000001,"max_cost":0.007996649472000001},"sats_pricing":{"prompt":3.6937551398393323e-05,"completion":5.202472027942721e-05,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":9.68295747378042,"max_completion_cost":0.8523730170581354,"max_cost":9.930145648727278},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3-235b-a22b-07-25","alias_ids":null,"forwarded_model_id":null},{"id":"router","name":"Switchpoint Router","created":1752272899,"description":"Switchpoint AI's router instantly analyzes your request and directs it to the optimal AI from an ever-evolving library. As the world of LLMs advances, our router gets smarter, ensuring you...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":3.561075e-07,"completion":1.42443e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.04667572224,"max_completion_cost":0.18670288896,"max_cost":0.18670288896},"sats_pricing":{"prompt":0.00044221012237513125,"completion":0.001768840489500525,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":57.9613651599532,"max_completion_cost":231.8454606398128,"max_cost":231.8454606398128},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"switchpoint/router","alias_ids":null,"forwarded_model_id":null},{"id":"kimi-k2","name":"MoonshotAI: Kimi K2 0711","created":1752263252,"description":"Kimi K2 Instruct is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It is optimized for...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":2.3880150000000004e-07,"completion":9.635850000000002e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.031300190208000005,"max_completion_cost":0.031574753280000006,"max_cost":0.05504989593600001},"sats_pricing":{"prompt":0.0002965409055927351,"completion":0.0011965685664268259,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":38.868209577850976,"max_completion_cost":39.20915878467423,"max_cost":68.36031596806247},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":32768,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"moonshotai/kimi-k2","alias_ids":null,"forwarded_model_id":null},{"id":"devstral-medium","name":"Mistral: Devstral Medium","created":1752161321,"description":"Devstral Medium is a high-performance code generation and agentic reasoning model developed jointly by Mistral AI and All Hands AI. Positioned as a step up from Devstral Small, it achieves...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":1.6758000000000002e-07,"completion":8.379000000000002e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.5960000000000002e-08,"input_cache_write":0.0,"max_prompt_cost":0.021965045760000003,"max_completion_cost":0.10982522880000002,"max_cost":0.10982522880000002},"sats_pricing":{"prompt":0.00020809888111770884,"completion":0.0010404944055885444,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.9818941058829413e-05,"input_cache_write":0.0,"max_prompt_cost":27.275936545860333,"max_completion_cost":136.3796827293017,"max_cost":136.3796827293017},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"mistralai/devstral-medium-2507","alias_ids":null,"forwarded_model_id":null},{"id":"devstral-small","name":"Mistral: Devstral Small 1.1","created":1752160751,"description":"Devstral Small 1.1 is a 24B parameter open-weight language model for software engineering agents, developed by Mistral AI in collaboration with All Hands AI. Finetuned from Mistral Small 3.1 and...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":4.1895000000000006e-08,"completion":1.25685e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":3.9900000000000005e-09,"input_cache_write":0.0,"max_prompt_cost":0.005491261440000001,"max_completion_cost":0.01647378432,"max_cost":0.01647378432},"sats_pricing":{"prompt":5.202472027942721e-05,"completion":0.00015607416083828163,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":4.954735264707353e-06,"input_cache_write":0.0,"max_prompt_cost":6.818984136465083,"max_completion_cost":20.45695240939525,"max_cost":20.45695240939525},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"mistralai/devstral-small-2507","alias_ids":null,"forwarded_model_id":null},{"id":"dolphin-mistral-24b-venice-edition","name":"Venice: Uncensored","created":1752094966,"description":"PPQ.AI model","context_length":32768,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Unknown","instruct_type":null},"pricing":{"prompt":1.25685e-07,"completion":4.189500000000001e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.00411844608,"max_completion_cost":0.013728153600000003,"max_cost":0.013728153600000003},"sats_pricing":{"prompt":0.00015607416083828163,"completion":0.0005202472027942722,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":5.114238102348812,"max_completion_cost":17.04746034116271,"max_cost":17.04746034116271},"per_request_limits":null,"top_provider":null,"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":null,"alias_ids":null,"forwarded_model_id":null},{"id":"grok-4","name":"xAI: Grok 4","created":1752087689,"description":"Grok 4 is xAI's latest reasoning model with a 256k context window. It supports parallel tool calling, structured outputs, and both image and text inputs. Note that reasoning is not...","context_length":256000,"architecture":{"modality":"text+image+file->text","input_modalities":["image","text","file"],"output_modalities":["text"],"tokenizer":"Grok","instruct_type":null},"pricing":{"prompt":1.25685e-06,"completion":6.28425e-06,"request":0.0,"image":0.0,"web_search":0.0019950000000000002,"internal_reasoning":0.0,"input_cache_read":2.9925000000000004e-07,"input_cache_write":0.0,"max_prompt_cost":0.32175360000000003,"max_completion_cost":1.608768,"max_cost":1.608768},"sats_pricing":{"prompt":0.0015607416083828162,"completion":0.007803708041914081,"request":0.001,"image":0.0,"web_search":2.4773676323536766,"internal_reasoning":0.0,"input_cache_read":0.0003716051448530515,"input_cache_write":0.0,"max_prompt_cost":399.54985174600097,"max_completion_cost":1997.7492587300046,"max_cost":1997.7492587300046},"per_request_limits":null,"top_provider":{"context_length":256000,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"x-ai/grok-4-07-09","alias_ids":null,"forwarded_model_id":null},{"id":"gemma-3n-e2b-it","name":"Gemma 3n 2B","created":1752074904,"description":"PPQ.AI model","context_length":8192,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Unknown","instruct_type":null},"pricing":{"prompt":1.25685e-07,"completion":4.189500000000001e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.00102961152,"max_completion_cost":0.0034320384000000007,"max_cost":0.0034320384000000007},"sats_pricing":{"prompt":0.00015607416083828163,"completion":0.0005202472027942722,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":1.278559525587203,"max_completion_cost":4.261865085290678,"max_cost":4.261865085290678},"per_request_limits":null,"top_provider":null,"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":null,"alias_ids":null,"forwarded_model_id":null},{"id":"hunyuan-a13b-instruct","name":"Tencent: Hunyuan A13B Instruct","created":1751987664,"description":"Hunyuan-A13B is a 13B active parameter Mixture-of-Experts (MoE) language model developed by Tencent, with a total parameter count of 80B and support for reasoning via Chain-of-Thought. It offers competitive benchmark...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":5.8652999999999996e-08,"completion":2.3880150000000004e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.0076877660159999995,"max_completion_cost":0.031300190208000005,"max_cost":0.031300190208000005},"sats_pricing":{"prompt":7.283460839119808e-05,"completion":0.0002965409055927351,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":9.546577791051115,"max_completion_cost":38.868209577850976,"max_cost":38.868209577850976},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":131072,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"tencent/hunyuan-a13b-instruct","alias_ids":null,"forwarded_model_id":null},{"id":"deepseek-r1t2-chimera","name":"TNG: DeepSeek R1T2 Chimera","created":1751986985,"description":"DeepSeek-TNG-R1T2-Chimera is the second-generation Chimera model from TNG Tech. It is a 671 B-parameter mixture-of-experts text-generation model assembled from DeepSeek-AI’s R1-0528, R1, and V3-0324 checkpoints with an Assembly-of-Experts merge. The...","context_length":163840,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek","instruct_type":null},"pricing":{"prompt":1.25685e-07,"completion":4.60845e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":5.985e-08,"input_cache_write":0.0,"max_prompt_cost":0.0205922304,"max_completion_cost":0.0755048448,"max_cost":0.0755048448},"sats_pricing":{"prompt":0.00015607416083828163,"completion":0.0005722719230736992,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":7.432102897061029e-05,"input_cache_write":0.0,"max_prompt_cost":25.57119051174406,"max_completion_cost":93.76103187639488,"max_cost":93.76103187639488},"per_request_limits":null,"top_provider":{"context_length":163840,"max_completion_tokens":163840,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"tngtech/deepseek-r1t2-chimera","alias_ids":null,"forwarded_model_id":null},{"id":"morph-v3-large","name":"Morph: Morph V3 Large","created":1751910858,"description":"Morph's high-accuracy apply model for complex code edits. ~4,500 tokens/sec with 98% accuracy for precise code transformations. The model requires the prompt to be in the following format: <instruction>{instruction}</instruction> <code>{initial_code}</code>...","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":3.77055e-07,"completion":7.96005e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.09884270592,"max_completion_cost":0.10433396736,"max_cost":0.15375532032},"sats_pricing":{"prompt":0.00046822248251484483,"completion":0.0009884696853091168,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":122.74171445637148,"max_completion_cost":129.56069859283656,"max_cost":190.9315558210223},"per_request_limits":null,"top_provider":{"context_length":262144,"max_completion_tokens":131072,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"morph/morph-v3-large","alias_ids":null,"forwarded_model_id":null},{"id":"morph-v3-fast","name":"Morph: Morph V3 Fast","created":1751910002,"description":"Morph's fastest apply model for code edits. ~10,500 tokens/sec with 96% accuracy for rapid code transformations. The model requires the prompt to be in the following format: <instruction>{instruction}</instruction> <code>{initial_code}</code> <update>{edit_snippet}</update>...","context_length":81920,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":3.3516000000000004e-07,"completion":5.0274e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.027456307200000002,"max_completion_cost":0.019104120000000002,"max_cost":0.03382434720000001},"sats_pricing":{"prompt":0.0004161977622354177,"completion":0.0006242966433531265,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":34.094920682325416,"max_completion_cost":23.723272447418807,"max_cost":42.002678164798354},"per_request_limits":null,"top_provider":{"context_length":81920,"max_completion_tokens":38000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"morph/morph-v3-fast","alias_ids":null,"forwarded_model_id":null},{"id":"ernie-4.5-vl-424b-a47b","name":"Baidu: ERNIE 4.5 VL 424B A47B ","created":1751300903,"description":"ERNIE-4.5-VL-424B-A47B is a multimodal Mixture-of-Experts (MoE) model from Baidu’s ERNIE 4.5 series, featuring 424B total parameters with 47B active per token. It is trained jointly on text and image data...","context_length":123000,"architecture":{"modality":"text+image->text","input_modalities":["image","text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":1.75959e-07,"completion":5.236875e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.021642957,"max_completion_cost":0.008379000000000001,"max_cost":0.027206613},"sats_pricing":{"prompt":0.00021850382517359424,"completion":0.0006503090034928401,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":26.875970496352096,"max_completion_cost":10.404944055885442,"max_cost":33.78485334946003},"per_request_limits":null,"top_provider":{"context_length":123000,"max_completion_tokens":16000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"baidu/ernie-4.5-vl-424b-a47b","alias_ids":null,"forwarded_model_id":null},{"id":"ernie-4.5-300b-a47b","name":"Baidu: ERNIE 4.5 300B A47B ","created":1751300139,"description":"ERNIE-4.5-300B-A47B is a 300B parameter Mixture-of-Experts (MoE) language model developed by Baidu as part of the ERNIE 4.5 series. It activates 47B parameters per token and supports text generation in...","context_length":123000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":1.1730599999999999e-07,"completion":4.60845e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.014428637999999999,"max_completion_cost":0.00553014,"max_cost":0.018551105999999998},"sats_pricing":{"prompt":0.00014566921678239617,"completion":0.0005722719230736992,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":17.91731366423473,"max_completion_cost":6.867263076884392,"max_cost":23.036546139730362},"per_request_limits":null,"top_provider":{"context_length":123000,"max_completion_tokens":12000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"baidu/ernie-4.5-300b-a47b","alias_ids":null,"forwarded_model_id":null},{"id":"mistral-small-3.2-24b-instruct","name":"Mistral: Mistral Small 3.2 24B","created":1750443016,"description":"Mistral-Small-3.2-24B-Instruct-2506 is an updated 24B parameter model from Mistral optimized for instruction following, repetition reduction, and improved function calling. Compared to the 3.1 release, version 3.2 significantly improves accuracy on...","context_length":128000,"architecture":{"modality":"text+image->text","input_modalities":["image","text"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":3.142125e-08,"completion":8.379000000000001e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.00402192,"max_completion_cost":0.0013728153600000002,"max_cost":0.0048799296},"sats_pricing":{"prompt":3.901854020957041e-05,"completion":0.00010404944055885442,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":4.994373146825012,"max_completion_cost":1.7047460341162708,"max_cost":6.059839418147681},"per_request_limits":null,"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"mistralai/mistral-small-3.2-24b-instruct-2506","alias_ids":null,"forwarded_model_id":null},{"id":"minimax-m1","name":"MiniMax: MiniMax M1","created":1750200414,"description":"MiniMax-M1 is a large-scale, open-weight reasoning model designed for extended context and high-efficiency inference. It leverages a hybrid Mixture-of-Experts (MoE) architecture paired with a custom \"lightning attention\" mechanism, allowing it...","context_length":1000000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":1.6758000000000002e-07,"completion":9.2169e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.16758000000000003,"max_completion_cost":0.0368676,"max_cost":0.19774440000000001},"sats_pricing":{"prompt":0.00020809888111770884,"completion":0.0011445438461473985,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":208.09888111770886,"max_completion_cost":45.78175384589594,"max_cost":245.55667971889642},"per_request_limits":null,"top_provider":{"context_length":1000000,"max_completion_tokens":40000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"minimax/minimax-m1","alias_ids":null,"forwarded_model_id":null},{"id":"gemini-2.5-flash","name":"Google: Gemini 2.5 Flash","created":1750172488,"description":"Gemini 2.5 Flash is Google's state-of-the-art workhorse model, specifically designed for advanced reasoning, coding, mathematics, and scientific tasks. It includes built-in \"thinking\" capabilities, enabling it to provide responses with greater...","context_length":1048576,"architecture":{"modality":"text+image+file+audio+video->text","input_modalities":["file","image","text","audio","video"],"output_modalities":["text"],"tokenizer":"Gemini","instruct_type":null},"pricing":{"prompt":8.379000000000001e-08,"completion":6.9825e-07,"request":0.0,"image":1.197e-07,"web_search":0.005586000000000001,"internal_reasoning":9.975000000000002e-07,"input_cache_read":1.1969999999999999e-08,"input_cache_write":3.325e-08,"max_prompt_cost":0.08786018304000001,"max_completion_cost":0.04575981375,"max_cost":0.12812881914000002},"sats_pricing":{"prompt":0.00010404944055885442,"completion":0.0008670786713237868,"request":0.001,"image":0.00014864205794122058,"web_search":6.936629370590294,"internal_reasoning":0.0012386838161768385,"input_cache_read":1.4864205794122058e-05,"input_cache_write":4.1289460539227946e-05,"max_prompt_cost":109.10374618344133,"max_completion_cost":56.82400072520437,"max_cost":159.10886682162118},"per_request_limits":null,"top_provider":{"context_length":1048576,"max_completion_tokens":65535,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"google/gemini-2.5-flash","alias_ids":null,"forwarded_model_id":null},{"id":"gemini-2.5-pro","name":"Google: Gemini 2.5 Pro","created":1750169544,"description":"Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy...","context_length":1048576,"architecture":{"modality":"text+image+file+audio+video->text","input_modalities":["text","image","file","audio","video"],"output_modalities":["text"],"tokenizer":"Gemini","instruct_type":null},"pricing":{"prompt":3.49125e-07,"completion":2.793e-06,"request":0.0,"image":4.987500000000001e-07,"web_search":0.005586000000000001,"internal_reasoning":3.990000000000001e-06,"input_cache_read":4.9875e-08,"input_cache_write":1.4962500000000002e-07,"max_prompt_cost":0.366084096,"max_completion_cost":0.183042048,"max_cost":0.526245888},"sats_pricing":{"prompt":0.0004335393356618934,"completion":0.003468314685295147,"request":0.001,"image":0.0006193419080884193,"web_search":6.936629370590294,"internal_reasoning":0.004954735264707354,"input_cache_read":6.193419080884192e-05,"input_cache_write":0.00018580257242652574,"max_prompt_cost":454.5989424310055,"max_completion_cost":227.29947121550276,"max_cost":653.4859797445705},"per_request_limits":null,"top_provider":{"context_length":1048576,"max_completion_tokens":65536,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"google/gemini-2.5-pro","alias_ids":null,"forwarded_model_id":null},{"id":"o3-pro","name":"OpenAI: o3 Pro","created":1749598352,"description":"The o-series of models are trained with reinforcement learning to think before they answer and perform complex reasoning. The o3-pro model uses more compute to think harder and provide consistently...","context_length":200000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","file","image"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":8.379e-06,"completion":3.3516e-05,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":1.6758000000000002,"max_completion_cost":3.3516000000000004,"max_cost":4.189500000000001},"sats_pricing":{"prompt":0.010404944055885441,"completion":0.041619776223541766,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":2080.9888111770883,"max_completion_cost":4161.977622354177,"max_cost":5202.472027942721},"per_request_limits":null,"top_provider":{"context_length":200000,"max_completion_tokens":100000,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/o3-pro-2025-06-10","alias_ids":null,"forwarded_model_id":null},{"id":"grok-3-mini","name":"xAI: Grok 3 Mini","created":1749583245,"description":"A lightweight model that thinks before responding. Fast, smart, and great for logic-based tasks that do not require deep domain knowledge. The raw thinking traces are accessible.","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Grok","instruct_type":null},"pricing":{"prompt":1.25685e-07,"completion":2.0947500000000004e-07,"request":0.0,"image":0.0,"web_search":0.0019950000000000002,"internal_reasoning":0.0,"input_cache_read":2.9925e-08,"input_cache_write":0.0,"max_prompt_cost":0.01647378432,"max_completion_cost":0.027456307200000005,"max_cost":0.027456307200000005},"sats_pricing":{"prompt":0.00015607416083828163,"completion":0.0002601236013971361,"request":0.001,"image":0.0,"web_search":2.4773676323536766,"internal_reasoning":0.0,"input_cache_read":3.7160514485305145e-05,"input_cache_write":0.0,"max_prompt_cost":20.45695240939525,"max_completion_cost":34.09492068232542,"max_cost":34.09492068232542},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"x-ai/grok-3-mini","alias_ids":null,"forwarded_model_id":null},{"id":"grok-3","name":"xAI: Grok 3","created":1749582908,"description":"Grok 3 is the latest model from xAI. It's their flagship model that excels at enterprise use cases like data extraction, coding, and text summarization. Possesses deep domain knowledge in...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Grok","instruct_type":null},"pricing":{"prompt":1.25685e-06,"completion":6.28425e-06,"request":0.0,"image":0.0,"web_search":0.0019950000000000002,"internal_reasoning":0.0,"input_cache_read":2.9925000000000004e-07,"input_cache_write":0.0,"max_prompt_cost":0.1647378432,"max_completion_cost":0.823689216,"max_cost":0.823689216},"sats_pricing":{"prompt":0.0015607416083828162,"completion":0.007803708041914081,"request":0.001,"image":0.0,"web_search":2.4773676323536766,"internal_reasoning":0.0,"input_cache_read":0.0003716051448530515,"input_cache_write":0.0,"max_prompt_cost":204.56952409395248,"max_completion_cost":1022.8476204697624,"max_cost":1022.8476204697624},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"x-ai/grok-3","alias_ids":null,"forwarded_model_id":null},{"id":"gemini-2.5-pro-preview","name":"Google: Gemini 2.5 Pro Preview 06-05","created":1749137257,"description":"Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy...","context_length":1048576,"architecture":{"modality":"text+image+file+audio->text","input_modalities":["file","image","text","audio"],"output_modalities":["text"],"tokenizer":"Gemini","instruct_type":null},"pricing":{"prompt":3.49125e-07,"completion":2.793e-06,"request":0.0,"image":4.987500000000001e-07,"web_search":0.005586000000000001,"internal_reasoning":3.990000000000001e-06,"input_cache_read":4.9875e-08,"input_cache_write":1.4962500000000002e-07,"max_prompt_cost":0.366084096,"max_completion_cost":0.183042048,"max_cost":0.526245888},"sats_pricing":{"prompt":0.0004335393356618934,"completion":0.003468314685295147,"request":0.001,"image":0.0006193419080884193,"web_search":6.936629370590294,"internal_reasoning":0.004954735264707354,"input_cache_read":6.193419080884192e-05,"input_cache_write":0.00018580257242652574,"max_prompt_cost":454.5989424310055,"max_completion_cost":227.29947121550276,"max_cost":653.4859797445705},"per_request_limits":null,"top_provider":{"context_length":1048576,"max_completion_tokens":65536,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"google/gemini-2.5-pro-preview-06-05","alias_ids":null,"forwarded_model_id":null},{"id":"deepseek-r1-0528","name":"DeepSeek R1 (Private via TEE)","created":1777854613,"description":"PPQ.AI model","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Unknown","instruct_type":null},"pricing":{"prompt":6.284250000000001e-07,"completion":2.1994875000000003e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.08236892160000002,"max_completion_cost":0.28829122560000003,"max_cost":0.28829122560000003},"sats_pricing":{"prompt":0.0007803708041914083,"completion":0.0027312978146699285,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":102.28476204697627,"max_completion_cost":357.99666716441686,"max_cost":357.99666716441686},"per_request_limits":null,"top_provider":null,"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":null,"alias_ids":null,"forwarded_model_id":null},{"id":"claude-opus-4","name":"Anthropic: Claude Opus 4","created":1747931245,"description":"Claude Opus 4 is benchmarked as the world’s best coding model, at time of release, bringing sustained performance on complex, long-running tasks and agent workflows. It sets new benchmarks in...","context_length":200000,"architecture":{"modality":"text+image+file->text","input_modalities":["image","text","file"],"output_modalities":["text"],"tokenizer":"Claude","instruct_type":null},"pricing":{"prompt":6.28425e-06,"completion":3.142125e-05,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":5.985000000000001e-07,"input_cache_write":7.48125e-06,"max_prompt_cost":1.25685,"max_completion_cost":1.00548,"max_cost":2.061234},"sats_pricing":{"prompt":0.007803708041914081,"completion":0.03901854020957041,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":0.000743210289706103,"input_cache_write":0.009290128621326285,"max_prompt_cost":1560.7416083828161,"max_completion_cost":1248.593286706253,"max_cost":2559.616237747818},"per_request_limits":null,"top_provider":{"context_length":200000,"max_completion_tokens":32000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"anthropic/claude-4-opus-20250522","alias_ids":null,"forwarded_model_id":null},{"id":"claude-sonnet-4","name":"Anthropic: Claude Sonnet 4","created":1747930371,"description":"Claude Sonnet 4 significantly enhances the capabilities of its predecessor, Sonnet 3.7, excelling in both coding and reasoning tasks with improved precision and controllability. Achieving state-of-the-art performance on SWE-bench (72.7%),...","context_length":1000000,"architecture":{"modality":"text+image+file->text","input_modalities":["image","text","file"],"output_modalities":["text"],"tokenizer":"Claude","instruct_type":null},"pricing":{"prompt":1.25685e-06,"completion":6.28425e-06,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":1.197e-07,"input_cache_write":1.49625e-06,"max_prompt_cost":1.25685,"max_completion_cost":0.402192,"max_cost":1.5786036},"sats_pricing":{"prompt":0.0015607416083828162,"completion":0.007803708041914081,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":0.00014864205794122058,"input_cache_write":0.0018580257242652575,"max_prompt_cost":1560.7416083828161,"max_completion_cost":499.43731468250115,"max_cost":1960.291460128817},"per_request_limits":null,"top_provider":{"context_length":1000000,"max_completion_tokens":64000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"anthropic/claude-4-sonnet-20250522","alias_ids":null,"forwarded_model_id":null},{"id":"gemma-3n-e4b-it","name":"Google: Gemma 3n 4B","created":1747776824,"description":"Gemma 3n E4B-it is optimized for efficient execution on mobile and low-resource devices, such as phones, laptops, and tablets. It supports multimodal inputs—including text, visual data, and audio—enabling diverse tasks...","context_length":32768,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":2.5137e-08,"completion":5.0274e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.000823689216,"max_completion_cost":0.001647378432,"max_cost":0.001647378432},"sats_pricing":{"prompt":3.121483216765632e-05,"completion":6.242966433531264e-05,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":1.0228476204697623,"max_completion_cost":2.0456952409395246,"max_cost":2.0456952409395246},"per_request_limits":null,"top_provider":{"context_length":32768,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"google/gemma-3n-e4b-it","alias_ids":null,"forwarded_model_id":null},{"id":"mistral-medium-3","name":"Mistral: Mistral Medium 3","created":1746627341,"description":"Mistral Medium 3 is a high-performance enterprise-grade language model designed to deliver frontier-level capabilities at significantly reduced operational cost. It balances state-of-the-art reasoning and multimodal performance with 8× lower cost...","context_length":131072,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":1.6758000000000002e-07,"completion":8.379000000000002e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.5960000000000002e-08,"input_cache_write":0.0,"max_prompt_cost":0.021965045760000003,"max_completion_cost":0.10982522880000002,"max_cost":0.10982522880000002},"sats_pricing":{"prompt":0.00020809888111770884,"completion":0.0010404944055885444,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.9818941058829413e-05,"input_cache_write":0.0,"max_prompt_cost":27.275936545860333,"max_completion_cost":136.3796827293017,"max_cost":136.3796827293017},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"mistralai/mistral-medium-3","alias_ids":null,"forwarded_model_id":null},{"id":"gemini-2.5-pro-preview-05-06","name":"Google: Gemini 2.5 Pro Preview 05-06","created":1746578513,"description":"Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy...","context_length":1048576,"architecture":{"modality":"text+image+file+audio+video->text","input_modalities":["text","image","file","audio","video"],"output_modalities":["text"],"tokenizer":"Gemini","instruct_type":null},"pricing":{"prompt":3.49125e-07,"completion":2.793e-06,"request":0.0,"image":4.987500000000001e-07,"web_search":0.005586000000000001,"internal_reasoning":3.990000000000001e-06,"input_cache_read":4.9875e-08,"input_cache_write":1.4962500000000002e-07,"max_prompt_cost":0.366084096,"max_completion_cost":0.183039255,"max_cost":0.526243444125},"sats_pricing":{"prompt":0.0004335393356618934,"completion":0.003468314685295147,"request":0.001,"image":0.0006193419080884193,"web_search":6.936629370590294,"internal_reasoning":0.004954735264707354,"input_cache_read":6.193419080884192e-05,"input_cache_write":0.00018580257242652574,"max_prompt_cost":454.5989424310055,"max_completion_cost":227.29600290081748,"max_cost":653.4829449692209},"per_request_limits":null,"top_provider":{"context_length":1048576,"max_completion_tokens":65535,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"google/gemini-2.5-pro-preview-03-25","alias_ids":null,"forwarded_model_id":null},{"id":"spotlight","name":"Arcee AI: Spotlight","created":1746481552,"description":"Spotlight is a 7‑billion‑parameter vision‑language model derived from Qwen 2.5‑VL and fine‑tuned by Arcee AI for tight image‑text grounding tasks. It offers a 32 k‑token context window, enabling rich multimodal...","context_length":131072,"architecture":{"modality":"text+image->text","input_modalities":["image","text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":7.541100000000001e-08,"completion":7.541100000000001e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.009884270592000002,"max_completion_cost":0.004942210707000001,"max_cost":0.009884270592000002},"sats_pricing":{"prompt":9.364449650296899e-05,"completion":9.364449650296899e-05,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":12.274171445637151,"max_completion_cost":6.1371793673150785,"max_cost":12.274171445637151},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":65537,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"arcee-ai/spotlight","alias_ids":null,"forwarded_model_id":null},{"id":"maestro-reasoning","name":"Arcee AI: Maestro Reasoning","created":1746481269,"description":"Maestro Reasoning is Arcee's flagship analysis model: a 32 B‑parameter derivative of Qwen 2.5‑32 B tuned with DPO and chain‑of‑thought RL for step‑by‑step logic. Compared to the earlier 7 B...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":3.77055e-07,"completion":1.382535e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.04942135296,"max_completion_cost":0.04424112,"max_cost":0.08159671296},"sats_pricing":{"prompt":0.00046822248251484483,"completion":0.0017168157692210979,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":61.37085722818574,"max_completion_cost":54.93810461507513,"max_cost":101.32584240278584},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":32000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"arcee-ai/maestro-reasoning","alias_ids":null,"forwarded_model_id":null},{"id":"virtuoso-large","name":"Arcee AI: Virtuoso Large","created":1746478885,"description":"Virtuoso‑Large is Arcee's top‑tier general‑purpose LLM at 72 B parameters, tuned to tackle cross‑domain reasoning, creative writing and enterprise QA. Unlike many 70 B peers, it retains the 128 k...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":3.142125e-07,"completion":5.0274e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.0411844608,"max_completion_cost":0.03217536,"max_cost":0.0532502208},"sats_pricing":{"prompt":0.00039018540209570404,"completion":0.0006242966433531265,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":51.14238102348812,"max_completion_cost":39.9549851746001,"max_cost":66.12550046396315},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":64000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"arcee-ai/virtuoso-large","alias_ids":null,"forwarded_model_id":null},{"id":"coder-large","name":"Arcee AI: Coder Large","created":1746478663,"description":"Coder‑Large is a 32 B‑parameter offspring of Qwen 2.5‑Instruct that has been further trained on permissively‑licensed GitHub, CodeSearchNet and synthetic bug‑fix corpora. It supports a 32k context window, enabling multi‑file...","context_length":32768,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":2.0947500000000004e-07,"completion":3.3516000000000004e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.006864076800000001,"max_completion_cost":0.010982522880000001,"max_cost":0.010982522880000001},"sats_pricing":{"prompt":0.0002601236013971361,"completion":0.0004161977622354177,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":8.523730170581356,"max_completion_cost":13.637968272930166,"max_cost":13.637968272930166},"per_request_limits":null,"top_provider":{"context_length":32768,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"arcee-ai/coder-large","alias_ids":null,"forwarded_model_id":null},{"id":"llama-guard-4-12b","name":"Meta: Llama Guard 4 12B","created":1745975193,"description":"Llama Guard 4 is a Llama 4 Scout-derived multimodal pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM...","context_length":163840,"architecture":{"modality":"text+image->text","input_modalities":["image","text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":7.541100000000001e-08,"completion":7.541100000000001e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.012355338240000003,"max_completion_cost":0.0012355338240000002,"max_cost":0.012355338240000003},"sats_pricing":{"prompt":9.364449650296899e-05,"completion":9.364449650296899e-05,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":15.342714307046439,"max_completion_cost":1.534271430704644,"max_cost":15.342714307046439},"per_request_limits":null,"top_provider":{"context_length":163840,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"meta-llama/llama-guard-4-12b","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3-30b-a3b","name":"Qwen: Qwen3 30B A3B","created":1745878604,"description":"Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique...","context_length":40960,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":"qwen3"},"pricing":{"prompt":3.7705500000000006e-08,"completion":1.885275e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.0015444172800000004,"max_completion_cost":0.0037705499999999997,"max_cost":0.0045608572799999995},"sats_pricing":{"prompt":4.6822248251484494e-05,"completion":0.00023411124125742241,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":1.9178392883808049,"max_completion_cost":4.682224825148448,"max_cost":5.663619148499563},"per_request_limits":null,"top_provider":{"context_length":40960,"max_completion_tokens":20000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3-30b-a3b-04-28","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3-8b","name":"Qwen: Qwen3 8B","created":1745876632,"description":"Qwen3-8B is a dense 8.2B parameter causal language model from the Qwen3 series, designed for both reasoning-heavy tasks and efficient dialogue. It supports seamless switching between \"thinking\" mode for math,...","context_length":40960,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":"qwen3"},"pricing":{"prompt":2.0947500000000003e-08,"completion":1.6758000000000002e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.995e-08,"input_cache_write":0.0,"max_prompt_cost":0.0008580096000000001,"max_completion_cost":0.0013728153600000002,"max_cost":0.0020592230400000005},"sats_pricing":{"prompt":2.6012360139713605e-05,"completion":0.00020809888111770884,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":2.4773676323536764e-05,"input_cache_write":0.0,"max_prompt_cost":1.0654662713226692,"max_completion_cost":1.7047460341162708,"max_cost":2.5571190511744066},"per_request_limits":null,"top_provider":{"context_length":40960,"max_completion_tokens":8192,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3-8b-04-28","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3-14b","name":"Qwen: Qwen3 14B","created":1745876478,"description":"Qwen3-14B is a dense 14.8B parameter causal language model from the Qwen3 series, designed for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for...","context_length":40960,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":"qwen3"},"pricing":{"prompt":2.5137e-08,"completion":1.00548e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.00102961152,"max_completion_cost":0.00411844608,"max_cost":0.00411844608},"sats_pricing":{"prompt":3.121483216765632e-05,"completion":0.00012485932867062528,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":1.278559525587203,"max_completion_cost":5.114238102348812,"max_cost":5.114238102348812},"per_request_limits":null,"top_provider":{"context_length":40960,"max_completion_tokens":40960,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3-14b-04-28","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3-32b","name":"Qwen: Qwen3 32B","created":1745875945,"description":"Qwen3-32B is a dense 32.8B parameter causal language model from the Qwen3 series, optimized for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for...","context_length":40960,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":"qwen3"},"pricing":{"prompt":3.351600000000001e-08,"completion":1.00548e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.5960000000000002e-08,"input_cache_write":0.0,"max_prompt_cost":0.0013728153600000002,"max_completion_cost":0.00411844608,"max_cost":0.00411844608},"sats_pricing":{"prompt":4.161977622354177e-05,"completion":0.00012485932867062528,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.9818941058829413e-05,"input_cache_write":0.0,"max_prompt_cost":1.7047460341162708,"max_completion_cost":5.114238102348812,"max_cost":5.114238102348812},"per_request_limits":null,"top_provider":{"context_length":40960,"max_completion_tokens":40960,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3-32b-04-28","alias_ids":null,"forwarded_model_id":null},{"id":"qwen3-235b-a22b","name":"Qwen: Qwen3 235B A22B","created":1745875757,"description":"Qwen3-235B-A22B is a 235B parameter mixture-of-experts (MoE) model developed by Qwen, activating 22B parameters per forward pass. It supports seamless switching between a \"thinking\" mode for complex reasoning, math, and...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":"qwen3"},"pricing":{"prompt":1.9062225e-07,"completion":7.62489e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.024985239552,"max_completion_cost":0.006246309888,"max_cost":0.029669971968000003},"sats_pricing":{"prompt":0.0002367124772713938,"completion":0.0009468499090855752,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":31.026377820916128,"max_completion_cost":7.756594455229032,"max_cost":36.843823662337904},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":8192,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen3-235b-a22b-04-28","alias_ids":null,"forwarded_model_id":null},{"id":"o4-mini-high","name":"OpenAI: o4 Mini High","created":1744824212,"description":"OpenAI o4-mini-high is the same model as [o4-mini](/openai/o4-mini) with reasoning_effort set to high. OpenAI o4-mini is a compact reasoning model in the o-series, optimized for fast, cost-efficient performance while retaining...","context_length":200000,"architecture":{"modality":"text+image+file->text","input_modalities":["image","text","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":4.60845e-07,"completion":1.84338e-06,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":1.0972500000000001e-07,"input_cache_write":0.0,"max_prompt_cost":0.092169,"max_completion_cost":0.184338,"max_cost":0.2304225},"sats_pricing":{"prompt":0.0005722719230736992,"completion":0.002289087692294797,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":0.0001362552197794522,"input_cache_write":0.0,"max_prompt_cost":114.45438461473985,"max_completion_cost":228.9087692294797,"max_cost":286.1359615368496},"per_request_limits":null,"top_provider":{"context_length":200000,"max_completion_tokens":100000,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/o4-mini-high-2025-04-16","alias_ids":null,"forwarded_model_id":null},{"id":"o3","name":"OpenAI: o3","created":1744823457,"description":"o3 is a well-rounded and powerful model across domains. It sets a new standard for math, science, coding, and visual reasoning tasks. It also excels at technical writing and instruction-following....","context_length":200000,"architecture":{"modality":"text+image+file->text","input_modalities":["image","text","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":8.379000000000002e-07,"completion":3.3516000000000007e-06,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":1.995e-07,"input_cache_write":0.0,"max_prompt_cost":0.16758000000000003,"max_completion_cost":0.33516000000000007,"max_cost":0.4189500000000001},"sats_pricing":{"prompt":0.0010404944055885444,"completion":0.004161977622354178,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":0.0002477367632353677,"input_cache_write":0.0,"max_prompt_cost":208.09888111770886,"max_completion_cost":416.1977622354177,"max_cost":520.2472027942722},"per_request_limits":null,"top_provider":{"context_length":200000,"max_completion_tokens":100000,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/o3-2025-04-16","alias_ids":null,"forwarded_model_id":null},{"id":"o4-mini","name":"OpenAI: o4 Mini","created":1744820942,"description":"OpenAI o4-mini is a compact reasoning model in the o-series, optimized for fast, cost-efficient performance while retaining strong multimodal and agentic capabilities. It supports tool use and demonstrates competitive reasoning...","context_length":200000,"architecture":{"modality":"text+image+file->text","input_modalities":["image","text","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":4.60845e-07,"completion":1.84338e-06,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":1.0972500000000001e-07,"input_cache_write":0.0,"max_prompt_cost":0.092169,"max_completion_cost":0.184338,"max_cost":0.2304225},"sats_pricing":{"prompt":0.0005722719230736992,"completion":0.002289087692294797,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":0.0001362552197794522,"input_cache_write":0.0,"max_prompt_cost":114.45438461473985,"max_completion_cost":228.9087692294797,"max_cost":286.1359615368496},"per_request_limits":null,"top_provider":{"context_length":200000,"max_completion_tokens":100000,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/o4-mini-2025-04-16","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-4.1","name":"OpenAI: GPT-4.1","created":1744651385,"description":"GPT-4.1 is a flagship large language model optimized for advanced instruction following, real-world software engineering, and long-context reasoning. It supports a 1 million token context window and outperforms GPT-4o and...","context_length":1047576,"architecture":{"modality":"text+image+file->text","input_modalities":["image","text","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":8.379000000000002e-07,"completion":3.3516000000000007e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.995e-07,"input_cache_write":0.0,"max_prompt_cost":0.8777639304000002,"max_completion_cost":3.511055721600001,"max_cost":3.511055721600001},"sats_pricing":{"prompt":0.0010404944055885444,"completion":0.004161977622354178,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0002477367632353677,"input_cache_write":0.0,"max_prompt_cost":1089.996967428825,"max_completion_cost":4359.9878697153,"max_cost":4359.9878697153},"per_request_limits":null,"top_provider":{"context_length":1047576,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-4.1-2025-04-14","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-4.1-mini","name":"OpenAI: GPT-4.1 Mini","created":1744651381,"description":"GPT-4.1 Mini is a mid-sized model delivering performance competitive with GPT-4o at substantially lower latency and cost. It retains a 1 million token context window and scores 45.1% on hard...","context_length":1047576,"architecture":{"modality":"text+image+file->text","input_modalities":["image","text","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":1.6758000000000002e-07,"completion":6.703200000000001e-07,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":3.99e-08,"input_cache_write":0.0,"max_prompt_cost":0.17555278608000002,"max_completion_cost":0.021965045760000003,"max_cost":0.1920265704},"sats_pricing":{"prompt":0.00020809888111770884,"completion":0.0008323955244708353,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":4.954735264707353e-05,"input_cache_write":0.0,"max_prompt_cost":217.99939348576495,"max_completion_cost":27.275936545860333,"max_cost":238.45634589516018},"per_request_limits":null,"top_provider":{"context_length":1047576,"max_completion_tokens":32768,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-4.1-mini-2025-04-14","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-4.1-nano","name":"OpenAI: GPT-4.1 Nano","created":1744651369,"description":"For tasks that demand low latency, GPT‑4.1 nano is the fastest and cheapest model in the GPT-4.1 series. It delivers exceptional performance at a small size with its 1 million...","context_length":1047576,"architecture":{"modality":"text+image+file->text","input_modalities":["image","text","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":4.1895000000000006e-08,"completion":1.6758000000000002e-07,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":9.975e-09,"input_cache_write":0.0,"max_prompt_cost":0.043888196520000006,"max_completion_cost":0.005491261440000001,"max_cost":0.0480066426},"sats_pricing":{"prompt":5.202472027942721e-05,"completion":0.00020809888111770884,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":1.2386838161768382e-05,"input_cache_write":0.0,"max_prompt_cost":54.49984837144124,"max_completion_cost":6.818984136465083,"max_cost":59.614086473790046},"per_request_limits":null,"top_provider":{"context_length":1047576,"max_completion_tokens":32768,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-4.1-nano-2025-04-14","alias_ids":null,"forwarded_model_id":null},{"id":"codellama-7b-instruct-solidity","name":"AlfredPros: CodeLLaMa 7B Instruct Solidity","created":1744641874,"description":"A finetuned 7 billion parameters Code LLaMA - Instruct model to generate Solidity smart contract using 4-bit QLoRA finetuning provided by PEFT library.","context_length":4096,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":"alpaca"},"pricing":{"prompt":3.3516000000000004e-07,"completion":5.0274e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.0013728153600000002,"max_completion_cost":0.00205922304,"max_cost":0.00205922304},"sats_pricing":{"prompt":0.0004161977622354177,"completion":0.0006242966433531265,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":1.7047460341162708,"max_completion_cost":2.557119051174406,"max_cost":2.557119051174406},"per_request_limits":null,"top_provider":{"context_length":4096,"max_completion_tokens":4096,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"alfredpros/codellama-7b-instruct-solidity","alias_ids":null,"forwarded_model_id":null},{"id":"grok-3-mini-beta","name":"xAI: Grok 3 Mini Beta","created":1744240195,"description":"Grok 3 Mini is a lightweight, smaller thinking model. Unlike traditional models that generate answers immediately, Grok 3 Mini thinks before responding. It’s ideal for reasoning-heavy tasks that don’t demand...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Grok","instruct_type":null},"pricing":{"prompt":1.25685e-07,"completion":2.0947500000000004e-07,"request":0.0,"image":0.0,"web_search":0.0019950000000000002,"internal_reasoning":0.0,"input_cache_read":2.9925e-08,"input_cache_write":0.0,"max_prompt_cost":0.01647378432,"max_completion_cost":0.027456307200000005,"max_cost":0.027456307200000005},"sats_pricing":{"prompt":0.00015607416083828163,"completion":0.0002601236013971361,"request":0.001,"image":0.0,"web_search":2.4773676323536766,"internal_reasoning":0.0,"input_cache_read":3.7160514485305145e-05,"input_cache_write":0.0,"max_prompt_cost":20.45695240939525,"max_completion_cost":34.09492068232542,"max_cost":34.09492068232542},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"x-ai/grok-3-mini-beta","alias_ids":null,"forwarded_model_id":null},{"id":"grok-3-beta","name":"xAI: Grok 3 Beta","created":1744240068,"description":"Grok 3 is the latest model from xAI. It's their flagship model that excels at enterprise use cases like data extraction, coding, and text summarization. Possesses deep domain knowledge in...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Grok","instruct_type":null},"pricing":{"prompt":1.25685e-06,"completion":6.28425e-06,"request":0.0,"image":0.0,"web_search":0.0019950000000000002,"internal_reasoning":0.0,"input_cache_read":2.9925000000000004e-07,"input_cache_write":0.0,"max_prompt_cost":0.1647378432,"max_completion_cost":0.823689216,"max_cost":0.823689216},"sats_pricing":{"prompt":0.0015607416083828162,"completion":0.007803708041914081,"request":0.001,"image":0.0,"web_search":2.4773676323536766,"internal_reasoning":0.0,"input_cache_read":0.0003716051448530515,"input_cache_write":0.0,"max_prompt_cost":204.56952409395248,"max_completion_cost":1022.8476204697624,"max_cost":1022.8476204697624},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"x-ai/grok-3-beta","alias_ids":null,"forwarded_model_id":null},{"id":"llama-4-maverick","name":"Meta: Llama 4 Maverick","created":1743881822,"description":"Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward...","context_length":1048576,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Llama4","instruct_type":null},"pricing":{"prompt":6.28425e-08,"completion":2.5137e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.06589513728,"max_completion_cost":0.00411844608,"max_cost":0.06898397184},"sats_pricing":{"prompt":7.803708041914081e-05,"completion":0.00031214832167656326,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":81.827809637581,"max_completion_cost":5.114238102348812,"max_cost":85.6634882143426},"per_request_limits":null,"top_provider":{"context_length":1048576,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"meta-llama/llama-4-maverick-17b-128e-instruct","alias_ids":null,"forwarded_model_id":null},{"id":"llama-4-scout","name":"Meta: Llama 4 Scout","created":1743881519,"description":"Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B. It supports native multimodal input...","context_length":327680,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Llama4","instruct_type":null},"pricing":{"prompt":3.351600000000001e-08,"completion":1.25685e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.010982522880000001,"max_completion_cost":0.00205922304,"max_cost":0.012492619776000002},"sats_pricing":{"prompt":4.161977622354177e-05,"completion":0.00015607416083828163,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":13.637968272930166,"max_completion_cost":2.557119051174406,"max_cost":15.513188910458066},"per_request_limits":null,"top_provider":{"context_length":327680,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"meta-llama/llama-4-scout-17b-16e-instruct","alias_ids":null,"forwarded_model_id":null},{"id":"deepseek-chat-v3-0324","name":"DeepSeek: DeepSeek V3 0324","created":1742824755,"description":"DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team. It succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well...","context_length":163840,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek","instruct_type":null},"pricing":{"prompt":8.379000000000001e-08,"completion":3.225915e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":5.3865000000000006e-08,"input_cache_write":0.0,"max_prompt_cost":0.013728153600000001,"max_completion_cost":0.005285339136,"max_cost":0.017640677376000002},"sats_pricing":{"prompt":0.00010404944055885442,"completion":0.00040059034615158953,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":6.688892607354927e-05,"input_cache_write":0.0,"max_prompt_cost":17.047460341162708,"max_completion_cost":6.563272231347643,"max_cost":21.90598653839408},"per_request_limits":null,"top_provider":{"context_length":163840,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"deepseek/deepseek-chat-v3-0324","alias_ids":null,"forwarded_model_id":null},{"id":"o1-pro","name":"OpenAI: o1-pro","created":1742423211,"description":"The o1 series of models are trained with reinforcement learning to think before they answer and perform complex reasoning. The o1-pro model uses more compute to think harder and provide...","context_length":200000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":6.28425e-05,"completion":0.00025137,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":12.5685,"max_completion_cost":25.137,"max_cost":31.42125},"sats_pricing":{"prompt":0.07803708041914081,"completion":0.31214832167656326,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":15607.416083828162,"max_completion_cost":31214.832167656325,"max_cost":39018.540209570405},"per_request_limits":null,"top_provider":{"context_length":200000,"max_completion_tokens":100000,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/o1-pro","alias_ids":null,"forwarded_model_id":null},{"id":"mistral-small-3.1-24b-instruct","name":"Mistral: Mistral Small 3.1 24B","created":1742238937,"description":"Mistral Small 3.1 24B Instruct is an upgraded variant of Mistral Small 3 (2501), featuring 24 billion parameters with advanced multimodal capabilities. It provides state-of-the-art performance in text-based reasoning and...","context_length":128000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":1.466325e-07,"completion":2.3461199999999998e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.01876896,"max_completion_cost":0.030030335999999998,"max_cost":0.030030335999999998},"sats_pricing":{"prompt":0.0001820865209779952,"completion":0.00029133843356479234,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":23.307074685183387,"max_completion_cost":37.29131949629342,"max_cost":37.29131949629342},"per_request_limits":null,"top_provider":{"context_length":128000,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"mistralai/mistral-small-3.1-24b-instruct-2503","alias_ids":null,"forwarded_model_id":null},{"id":"gemma-3-4b-it","name":"Google: Gemma 3 4B","created":1741905510,"description":"Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities,...","context_length":131072,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Gemini","instruct_type":"gemma"},"pricing":{"prompt":1.6758000000000004e-08,"completion":3.351600000000001e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.0021965045760000005,"max_completion_cost":0.0005491261440000001,"max_cost":0.0024710676480000004},"sats_pricing":{"prompt":2.0809888111770886e-05,"completion":4.161977622354177e-05,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":2.7275936545860335,"max_completion_cost":0.6818984136465084,"max_cost":3.068542861409288},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"google/gemma-3-4b-it","alias_ids":null,"forwarded_model_id":null},{"id":"gemma-3-12b-it","name":"Google: Gemma 3 12B","created":1741902625,"description":"Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities,...","context_length":131072,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Gemini","instruct_type":"gemma"},"pricing":{"prompt":1.6758000000000004e-08,"completion":5.446350000000001e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.0021965045760000005,"max_completion_cost":0.0008923299840000002,"max_cost":0.002814271488000001},"sats_pricing":{"prompt":2.0809888111770886e-05,"completion":6.763213636325538e-05,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":2.7275936545860335,"max_completion_cost":1.1080849221755762,"max_cost":3.494729369938356},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"google/gemma-3-12b-it","alias_ids":null,"forwarded_model_id":null},{"id":"command-a","name":"Cohere: Command A","created":1741894342,"description":"Command A is an open-weights 111B parameter model with a 256k context window focused on delivering great performance across agentic, multilingual, and coding use cases. Compared to other leading proprietary...","context_length":256000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":1.047375e-06,"completion":4.1895e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.26812800000000003,"max_completion_cost":0.034320384,"max_cost":0.293868288},"sats_pricing":{"prompt":0.0013006180069856802,"completion":0.005202472027942721,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":332.95820978833416,"max_completion_cost":42.61865085290677,"max_cost":364.92219792801416},"per_request_limits":null,"top_provider":{"context_length":256000,"max_completion_tokens":8192,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"cohere/command-a-03-2025","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-4o-mini-search-preview","name":"OpenAI: GPT-4o-mini Search Preview","created":1741818122,"description":"GPT-4o mini Search Preview is a specialized model for web search in Chat Completions. It is trained to understand and execute web search queries.","context_length":128000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":6.28425e-08,"completion":2.5137e-07,"request":0.0,"image":0.0,"web_search":0.010972500000000001,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.00804384,"max_completion_cost":0.00411844608,"max_cost":0.01113267456},"sats_pricing":{"prompt":7.803708041914081e-05,"completion":0.00031214832167656326,"request":0.001,"image":0.0,"web_search":13.625521977945223,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":9.988746293650024,"max_completion_cost":5.114238102348812,"max_cost":13.824424870411631},"per_request_limits":null,"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-4o-mini-search-preview-2025-03-11","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-4o-search-preview","name":"OpenAI: GPT-4o Search Preview","created":1741817949,"description":"GPT-4o Search Previewis a specialized model for web search in Chat Completions. It is trained to understand and execute web search queries.","context_length":128000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":1.047375e-06,"completion":4.1895e-06,"request":0.0,"image":0.0,"web_search":0.013965000000000002,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.13406400000000002,"max_completion_cost":0.068640768,"max_cost":0.18554457600000002},"sats_pricing":{"prompt":0.0013006180069856802,"completion":0.005202472027942721,"request":0.001,"image":0.0,"web_search":17.341573426475737,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":166.47910489416708,"max_completion_cost":85.23730170581354,"max_cost":230.40708117352722},"per_request_limits":null,"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-4o-search-preview-2025-03-11","alias_ids":null,"forwarded_model_id":null},{"id":"reka-flash-3","name":"Reka Flash 3","created":1741812813,"description":"Reka Flash 3 is a general-purpose, instruction-tuned large language model with 21 billion parameters, developed by Reka. It excels at general chat, coding tasks, instruction-following, and function calling. Featuring a...","context_length":65536,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":4.1895000000000006e-08,"completion":8.379000000000001e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.0027456307200000004,"max_completion_cost":0.005491261440000001,"max_cost":0.005491261440000001},"sats_pricing":{"prompt":5.202472027942721e-05,"completion":0.00010404944055885442,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":3.4094920682325416,"max_completion_cost":6.818984136465083,"max_cost":6.818984136465083},"per_request_limits":null,"top_provider":{"context_length":65536,"max_completion_tokens":65536,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"rekaai/reka-flash-3","alias_ids":null,"forwarded_model_id":null},{"id":"gemma-3-27b-it","name":"Google: Gemma 3 27B","created":1741756359,"description":"Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities,...","context_length":131072,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Gemini","instruct_type":"gemma"},"pricing":{"prompt":3.351600000000001e-08,"completion":6.703200000000001e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.004393009152000001,"max_completion_cost":0.0010982522880000002,"max_cost":0.004942135296000001},"sats_pricing":{"prompt":4.161977622354177e-05,"completion":8.323955244708354e-05,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":5.455187309172067,"max_completion_cost":1.3637968272930168,"max_cost":6.137085722818576},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"google/gemma-3-27b-it","alias_ids":null,"forwarded_model_id":null},{"id":"skyfall-36b-v2","name":"TheDrummer: Skyfall 36B V2","created":1741636566,"description":"Skyfall 36B v2 is an enhanced iteration of Mistral Small 2501, specifically fine-tuned for improved creativity, nuanced writing, role-playing, and coherent storytelling.","context_length":32768,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":2.304225e-07,"completion":3.3516000000000004e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":9.975e-08,"input_cache_write":0.0,"max_prompt_cost":0.00755048448,"max_completion_cost":0.010982522880000001,"max_cost":0.010982522880000001},"sats_pricing":{"prompt":0.0002861359615368496,"completion":0.0004161977622354177,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.00012386838161768384,"input_cache_write":0.0,"max_prompt_cost":9.376103187639488,"max_completion_cost":13.637968272930166,"max_cost":13.637968272930166},"per_request_limits":null,"top_provider":{"context_length":32768,"max_completion_tokens":32768,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"thedrummer/skyfall-36b-v2","alias_ids":null,"forwarded_model_id":null},{"id":"sonar-reasoning-pro","name":"Perplexity: Sonar Reasoning Pro","created":1741313308,"description":"Note: Sonar Pro pricing includes Perplexity search pricing. See [details here](https://docs.perplexity.ai/guides/pricing#detailed-pricing-breakdown-for-sonar-reasoning-pro-and-sonar-pro) Sonar Reasoning Pro is a premier reasoning model powered by DeepSeek R1 with Chain of Thought (CoT). Designed for...","context_length":128000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":"deepseek-r1"},"pricing":{"prompt":8.379000000000002e-07,"completion":3.3516000000000007e-06,"request":0.0,"image":0.0,"web_search":0.0019950000000000002,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.10725120000000002,"max_completion_cost":0.4290048000000001,"max_cost":0.4290048000000001},"sats_pricing":{"prompt":0.0010404944055885444,"completion":0.004161977622354178,"request":0.001,"image":0.0,"web_search":2.4773676323536766,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":133.18328391533367,"max_completion_cost":532.7331356613347,"max_cost":532.7331356613347},"per_request_limits":null,"top_provider":{"context_length":128000,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"perplexity/sonar-reasoning-pro","alias_ids":null,"forwarded_model_id":null},{"id":"sonar-pro","name":"Perplexity: Sonar Pro","created":1741312423,"description":"Note: Sonar Pro pricing includes Perplexity search pricing. See [details here](https://docs.perplexity.ai/guides/pricing#detailed-pricing-breakdown-for-sonar-reasoning-pro-and-sonar-pro) For enterprises seeking more advanced capabilities, the Sonar Pro API can handle in-depth, multi-step queries with added extensibility, like...","context_length":200000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":1.25685e-06,"completion":6.28425e-06,"request":0.0,"image":0.0,"web_search":0.0019950000000000002,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.25137000000000004,"max_completion_cost":0.050274,"max_cost":0.2915892},"sats_pricing":{"prompt":0.0015607416083828162,"completion":0.007803708041914081,"request":0.001,"image":0.0,"web_search":2.4773676323536766,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":312.1483216765633,"max_completion_cost":62.429664335312644,"max_cost":362.09205314481335},"per_request_limits":null,"top_provider":{"context_length":200000,"max_completion_tokens":8000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"perplexity/sonar-pro","alias_ids":null,"forwarded_model_id":null},{"id":"sonar-deep-research","name":"Perplexity: Sonar Deep Research","created":1741311246,"description":"Sonar Deep Research is a research-focused model designed for multi-step retrieval, synthesis, and reasoning across complex topics. It autonomously searches, reads, and evaluates sources, refining its approach as it gathers...","context_length":128000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":"deepseek-r1"},"pricing":{"prompt":8.379000000000002e-07,"completion":3.3516000000000007e-06,"request":0.0,"image":0.0,"web_search":0.0019950000000000002,"internal_reasoning":1.1970000000000001e-06,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.10725120000000002,"max_completion_cost":0.4290048000000001,"max_cost":0.4290048000000001},"sats_pricing":{"prompt":0.0010404944055885444,"completion":0.004161977622354178,"request":0.001,"image":0.0,"web_search":2.4773676323536766,"internal_reasoning":0.001486420579412206,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":133.18328391533367,"max_completion_cost":532.7331356613347,"max_cost":532.7331356613347},"per_request_limits":null,"top_provider":{"context_length":128000,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"perplexity/sonar-deep-research","alias_ids":null,"forwarded_model_id":null},{"id":"gemini-2.0-flash-lite-001","name":"Google: Gemini 2.0 Flash Lite","created":1740506212,"description":"Gemini 2.0 Flash Lite offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro 1.5](/google/gemini-pro-1.5),...","context_length":1048576,"architecture":{"modality":"text+image+file+audio+video->text","input_modalities":["text","image","file","audio","video"],"output_modalities":["text"],"tokenizer":"Gemini","instruct_type":null},"pricing":{"prompt":2.0947500000000003e-08,"completion":8.379000000000001e-08,"request":0.0,"image":2.9925e-08,"web_search":0.005586000000000001,"internal_reasoning":1.197e-07,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.021965045760000003,"max_completion_cost":0.0006864076800000001,"max_cost":0.022479851520000003},"sats_pricing":{"prompt":2.6012360139713605e-05,"completion":0.00010404944055885442,"request":0.001,"image":3.7160514485305145e-05,"web_search":6.936629370590294,"internal_reasoning":0.00014864205794122058,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":27.275936545860333,"max_completion_cost":0.8523730170581354,"max_cost":27.915216308653935},"per_request_limits":null,"top_provider":{"context_length":1048576,"max_completion_tokens":8192,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"google/gemini-2.0-flash-lite-001","alias_ids":null,"forwarded_model_id":null},{"id":"claude-3.7-sonnet","name":"Anthropic: Claude 3.7 Sonnet","created":1740422110,"description":"Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and...","context_length":200000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Claude","instruct_type":null},"pricing":{"prompt":1.25685e-06,"completion":6.28425e-06,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":1.197e-07,"input_cache_write":1.49625e-06,"max_prompt_cost":0.25137000000000004,"max_completion_cost":0.804384,"max_cost":0.8948772},"sats_pricing":{"prompt":0.0015607416083828162,"completion":0.007803708041914081,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":0.00014864205794122058,"input_cache_write":0.0018580257242652575,"max_prompt_cost":312.1483216765633,"max_completion_cost":998.8746293650023,"max_cost":1111.2480251685652},"per_request_limits":null,"top_provider":{"context_length":200000,"max_completion_tokens":128000,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"anthropic/claude-3-7-sonnet-20250219","alias_ids":null,"forwarded_model_id":null},{"id":"claude-3.7-sonnet:thinking","name":"Anthropic: Claude 3.7 Sonnet (thinking)","created":1740422110,"description":"Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and...","context_length":200000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Claude","instruct_type":null},"pricing":{"prompt":1.25685e-06,"completion":6.28425e-06,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":1.197e-07,"input_cache_write":1.49625e-06,"max_prompt_cost":0.25137000000000004,"max_completion_cost":0.402192,"max_cost":0.5731236},"sats_pricing":{"prompt":0.0015607416083828162,"completion":0.007803708041914081,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":0.00014864205794122058,"input_cache_write":0.0018580257242652575,"max_prompt_cost":312.1483216765633,"max_completion_cost":499.43731468250115,"max_cost":711.6981734225641},"per_request_limits":null,"top_provider":{"context_length":200000,"max_completion_tokens":64000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"anthropic/claude-3-7-sonnet-20250219","alias_ids":null,"forwarded_model_id":null},{"id":"mistral-saba","name":"Mistral: Saba","created":1739803239,"description":"Mistral Saba is a 24B-parameter language model specifically designed for the Middle East and South Asia, delivering accurate and contextually relevant responses while maintaining efficient performance. Trained on curated regional...","context_length":32768,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":8.379000000000001e-08,"completion":2.5137e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":7.980000000000001e-09,"input_cache_write":0.0,"max_prompt_cost":0.0027456307200000004,"max_completion_cost":0.00823689216,"max_cost":0.00823689216},"sats_pricing":{"prompt":0.00010404944055885442,"completion":0.00031214832167656326,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":9.909470529414706e-06,"input_cache_write":0.0,"max_prompt_cost":3.4094920682325416,"max_completion_cost":10.228476204697625,"max_cost":10.228476204697625},"per_request_limits":null,"top_provider":{"context_length":32768,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"mistralai/mistral-saba-2502","alias_ids":null,"forwarded_model_id":null},{"id":"llama-guard-3-8b","name":"Llama Guard 3 8B","created":1739401318,"description":"Llama Guard 3 is a Llama-3.1-8B pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification)...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Llama3","instruct_type":"none"},"pricing":{"prompt":2.01096e-07,"completion":1.25685e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.026358054912,"max_completion_cost":0.001647378432,"max_cost":0.026358054912},"sats_pricing":{"prompt":0.00024971865734125056,"completion":1.560741608382816e-05,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":32.73112385503239,"max_completion_cost":2.0456952409395246,"max_cost":32.73112385503239},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"meta-llama/llama-guard-3-8b","alias_ids":null,"forwarded_model_id":null},{"id":"o3-mini-high","name":"OpenAI: o3 Mini High","created":1739372611,"description":"OpenAI o3-mini-high is the same model as [o3-mini](/openai/o3-mini) with reasoning_effort set to high. o3-mini is a cost-efficient language model optimized for STEM reasoning tasks, particularly excelling in science, mathematics, and...","context_length":200000,"architecture":{"modality":"text+file->text","input_modalities":["text","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":4.60845e-07,"completion":1.84338e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":2.1945000000000002e-07,"input_cache_write":0.0,"max_prompt_cost":0.092169,"max_completion_cost":0.184338,"max_cost":0.2304225},"sats_pricing":{"prompt":0.0005722719230736992,"completion":0.002289087692294797,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0002725104395589044,"input_cache_write":0.0,"max_prompt_cost":114.45438461473985,"max_completion_cost":228.9087692294797,"max_cost":286.1359615368496},"per_request_limits":null,"top_provider":{"context_length":200000,"max_completion_tokens":100000,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/o3-mini-high-2025-01-31","alias_ids":null,"forwarded_model_id":null},{"id":"gemini-2.0-flash-001","name":"Google: Gemini 2.0 Flash","created":1738769413,"description":"Gemini Flash 2.0 offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro 1.5](/google/gemini-pro-1.5). It...","context_length":1000000,"architecture":{"modality":"text+image+file+audio+video->text","input_modalities":["text","image","file","audio","video"],"output_modalities":["text"],"tokenizer":"Gemini","instruct_type":null},"pricing":{"prompt":2.7930000000000003e-08,"completion":1.1172000000000001e-07,"request":0.0,"image":3.99e-08,"web_search":0.005586000000000001,"internal_reasoning":1.596e-07,"input_cache_read":9.975e-09,"input_cache_write":3.325e-08,"max_prompt_cost":0.029286727680000003,"max_completion_cost":0.0009152102400000001,"max_cost":0.02997313536},"sats_pricing":{"prompt":3.468314685295147e-05,"completion":0.00013873258741180588,"request":0.001,"image":4.954735264707353e-05,"web_search":6.936629370590294,"internal_reasoning":0.00019818941058829411,"input_cache_read":1.2386838161768382e-05,"input_cache_write":4.1289460539227946e-05,"max_prompt_cost":36.36791539448044,"max_completion_cost":1.1364973560775138,"max_cost":37.220288411538576},"per_request_limits":null,"top_provider":{"context_length":1048576,"max_completion_tokens":8192,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"google/gemini-2.0-flash-001","alias_ids":null,"forwarded_model_id":null},{"id":"qwen-vl-plus","name":"Qwen: Qwen VL Plus","created":1738731255,"description":"Qwen's Enhanced Large Visual Language Model. Significantly upgraded for detailed recognition capabilities and text recognition abilities, supporting ultra-high pixel resolutions up to millions of pixels and extreme aspect ratios for...","context_length":131072,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Qwen","instruct_type":null},"pricing":{"prompt":5.7186675000000013e-08,"completion":1.7156002500000002e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.08927e-08,"input_cache_write":0.0,"max_prompt_cost":0.007495571865600002,"max_completion_cost":0.0014054197248000002,"max_cost":0.008432518348800003},"sats_pricing":{"prompt":7.101374318141815e-05,"completion":0.00021304122954425442,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.3526427272651075e-05,"input_cache_write":0.0,"max_prompt_cost":9.30791334627484,"max_completion_cost":1.7452337524265322,"max_cost":10.471402514559196},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":8192,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen-vl-plus","alias_ids":null,"forwarded_model_id":null},{"id":"aion-1.0","name":"AionLabs: Aion-1.0","created":1738697557,"description":"Aion-1.0 is a multi-model system designed for high performance across various tasks, including reasoning and coding. It is built on DeepSeek-R1, augmented with additional models and techniques such as Tree...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":1.6758000000000003e-06,"completion":3.3516000000000007e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.21965045760000004,"max_completion_cost":0.10982522880000002,"max_cost":0.2745630720000001},"sats_pricing":{"prompt":0.002080988811177089,"completion":0.004161977622354178,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":272.7593654586034,"max_completion_cost":136.3796827293017,"max_cost":340.9492068232542},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":32768,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"aion-labs/aion-1.0","alias_ids":null,"forwarded_model_id":null},{"id":"aion-1.0-mini","name":"AionLabs: Aion-1.0-Mini","created":1738697107,"description":"Aion-1.0-Mini 32B parameter model is a distilled version of the DeepSeek-R1 model, designed for strong performance in reasoning domains such as mathematics, coding, and logic. It is a modified variant...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":2.93265e-07,"completion":5.8653e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.03843883008,"max_completion_cost":0.01921941504,"max_cost":0.048048537599999996},"sats_pricing":{"prompt":0.0003641730419559904,"completion":0.0007283460839119808,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":47.732888955255575,"max_completion_cost":23.866444477627788,"max_cost":59.666111194069465},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":32768,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"aion-labs/aion-1.0-mini","alias_ids":null,"forwarded_model_id":null},{"id":"aion-rp-llama-3.1-8b","name":"AionLabs: Aion-RP 1.0 (8B)","created":1738696718,"description":"Aion-RP-Llama-3.1-8B ranks the highest in the character evaluation portion of the RPBench-Auto benchmark, a roleplaying-specific variant of Arena-Hard-Auto, where LLMs evaluate each other’s responses. It is a fine-tuned base model...","context_length":32768,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":3.3516000000000004e-07,"completion":6.703200000000001e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.010982522880000001,"max_completion_cost":0.021965045760000003,"max_cost":0.021965045760000003},"sats_pricing":{"prompt":0.0004161977622354177,"completion":0.0008323955244708353,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":13.637968272930166,"max_completion_cost":27.275936545860333,"max_cost":27.275936545860333},"per_request_limits":null,"top_provider":{"context_length":32768,"max_completion_tokens":32768,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"aion-labs/aion-rp-llama-3.1-8b","alias_ids":null,"forwarded_model_id":null},{"id":"qwen-vl-max","name":"Qwen: Qwen VL Max","created":1738434304,"description":"Qwen VL Max is a visual understanding model with 7500 tokens context length. It excels in delivering optimal performance for a broader spectrum of complex tasks.\n","context_length":131072,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Qwen","instruct_type":null},"pricing":{"prompt":2.1785400000000004e-07,"completion":8.714160000000002e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.028554559488000005,"max_completion_cost":0.028554559488000005,"max_cost":0.04997047910400001},"sats_pricing":{"prompt":0.00027052854545302153,"completion":0.0010821141818120861,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":35.45871750961844,"max_completion_cost":35.45871750961844,"max_cost":62.05275564183226},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":32768,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen-vl-max-2025-01-25","alias_ids":null,"forwarded_model_id":null},{"id":"qwen-turbo","name":"Qwen: Qwen-Turbo","created":1738410974,"description":"Qwen-Turbo, based on Qwen2.5, is a 1M context model that provides fast speed and low cost, suitable for simple tasks.","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen","instruct_type":null},"pricing":{"prompt":1.3615875000000002e-08,"completion":5.446350000000001e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":2.5935e-09,"input_cache_write":0.0,"max_prompt_cost":0.0017846599680000003,"max_completion_cost":0.0004461649920000001,"max_cost":0.002119283712},"sats_pricing":{"prompt":1.6908034090813846e-05,"completion":6.763213636325538e-05,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":3.2205779220597797e-06,"input_cache_write":0.0,"max_prompt_cost":2.2161698443511524,"max_completion_cost":0.5540424610877881,"max_cost":2.631701690166993},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":8192,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen-turbo-2024-11-01","alias_ids":null,"forwarded_model_id":null},{"id":"qwen2.5-vl-72b-instruct","name":"Qwen: Qwen2.5 VL 72B Instruct","created":1738410311,"description":"Qwen2.5-VL is proficient in recognizing common objects such as flowers, birds, fish, and insects. It is also highly capable of analyzing texts, charts, icons, graphics, and layouts within images.","context_length":32000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Qwen","instruct_type":null},"pricing":{"prompt":1.0473750000000002e-07,"completion":3.142125e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.0033516000000000006,"max_completion_cost":0.010054800000000001,"max_cost":0.010054800000000001},"sats_pricing":{"prompt":0.00013006180069856805,"completion":0.00039018540209570404,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":4.161977622354177,"max_completion_cost":12.48593286706253,"max_cost":12.48593286706253},"per_request_limits":null,"top_provider":{"context_length":32000,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen2.5-vl-72b-instruct","alias_ids":null,"forwarded_model_id":null},{"id":"qwen-plus","name":"Qwen: Qwen-Plus","created":1738409840,"description":"Qwen-Plus, based on the Qwen2.5 foundation model, is a 131K context model with a balanced performance, speed, and cost combination.","context_length":1000000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen","instruct_type":null},"pricing":{"prompt":1.0892700000000002e-07,"completion":3.26781e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":2.0748e-08,"input_cache_write":1.29675e-07,"max_prompt_cost":0.10892700000000002,"max_completion_cost":0.010707959808,"max_cost":0.11606563987200003},"sats_pricing":{"prompt":0.00013526427272651077,"completion":0.0004057928181795322,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":2.5764623376478237e-05,"input_cache_write":0.00016102889610298898,"max_prompt_cost":135.26427272651077,"max_completion_cost":13.29701906610691,"max_cost":144.12895210391537},"per_request_limits":null,"top_provider":{"context_length":1000000,"max_completion_tokens":32768,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen-plus-2025-01-25","alias_ids":null,"forwarded_model_id":null},{"id":"qwen-max","name":"Qwen: Qwen-Max ","created":1738402289,"description":"Qwen-Max, based on Qwen2.5, provides the best inference performance among [Qwen models](/qwen), especially for complex multi-step tasks. It's a large-scale MoE model that has been pretrained on over 20 trillion...","context_length":32768,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen","instruct_type":null},"pricing":{"prompt":4.357080000000001e-07,"completion":1.7428320000000003e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":8.2992e-08,"input_cache_write":0.0,"max_prompt_cost":0.014277279744000003,"max_completion_cost":0.014277279744000003,"max_cost":0.024985239552000004},"sats_pricing":{"prompt":0.0005410570909060431,"completion":0.0021642283636241723,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.00010305849350591295,"input_cache_write":0.0,"max_prompt_cost":17.72935875480922,"max_completion_cost":17.72935875480922,"max_cost":31.02637782091613},"per_request_limits":null,"top_provider":{"context_length":32768,"max_completion_tokens":8192,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen-max-2025-01-25","alias_ids":null,"forwarded_model_id":null},{"id":"o3-mini","name":"OpenAI: o3 Mini","created":1738351721,"description":"OpenAI o3-mini is a cost-efficient language model optimized for STEM reasoning tasks, particularly excelling in science, mathematics, and coding. This model supports the `reasoning_effort` parameter, which can be set to...","context_length":200000,"architecture":{"modality":"text+file->text","input_modalities":["text","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":4.60845e-07,"completion":1.84338e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":2.1945000000000002e-07,"input_cache_write":0.0,"max_prompt_cost":0.092169,"max_completion_cost":0.184338,"max_cost":0.2304225},"sats_pricing":{"prompt":0.0005722719230736992,"completion":0.002289087692294797,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0002725104395589044,"input_cache_write":0.0,"max_prompt_cost":114.45438461473985,"max_completion_cost":228.9087692294797,"max_cost":286.1359615368496},"per_request_limits":null,"top_provider":{"context_length":200000,"max_completion_tokens":100000,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/o3-mini-2025-01-31","alias_ids":null,"forwarded_model_id":null},{"id":"mistral-small-24b-instruct-2501","name":"Mistral: Mistral Small 3","created":1738255409,"description":"Mistral Small 3 is a 24B-parameter language model optimized for low-latency performance across common AI tasks. Released under the Apache 2.0 license, it features both pre-trained and instruction-tuned versions designed...","context_length":32768,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":2.0947500000000003e-08,"completion":3.351600000000001e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.0006864076800000001,"max_completion_cost":0.0005491261440000001,"max_cost":0.0008923299840000002},"sats_pricing":{"prompt":2.6012360139713605e-05,"completion":4.161977622354177e-05,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.8523730170581354,"max_completion_cost":0.6818984136465084,"max_cost":1.1080849221755762},"per_request_limits":null,"top_provider":{"context_length":32768,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"mistralai/mistral-small-24b-instruct-2501","alias_ids":null,"forwarded_model_id":null},{"id":"deepseek-r1-distill-qwen-32b","name":"DeepSeek: R1 Distill Qwen 32B","created":1738194830,"description":"DeepSeek R1 Distill Qwen 32B is a distilled large language model based on [Qwen 2.5 32B](https://huggingface.co/Qwen/Qwen2.5-32B), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). It outperforms OpenAI's o1-mini across various benchmarks, achieving new...","context_length":32768,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen","instruct_type":"deepseek-r1"},"pricing":{"prompt":1.214955e-07,"completion":1.214955e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.003981164544,"max_completion_cost":0.003981164544,"max_cost":0.003981164544},"sats_pricing":{"prompt":0.0001508716888103389,"completion":0.0001508716888103389,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":4.9437634989371855,"max_completion_cost":4.9437634989371855,"max_cost":4.9437634989371855},"per_request_limits":null,"top_provider":{"context_length":32768,"max_completion_tokens":32768,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"deepseek/deepseek-r1-distill-qwen-32b","alias_ids":null,"forwarded_model_id":null},{"id":"sonar","name":"Perplexity: Sonar","created":1738013808,"description":"Sonar is lightweight, affordable, fast, and simple to use — now featuring citations and the ability to customize sources. It is designed for companies seeking to integrate lightweight question-and-answer features...","context_length":127072,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":4.189500000000001e-07,"completion":4.189500000000001e-07,"request":0.0,"image":0.0,"web_search":0.0019950000000000002,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.05323681440000001,"max_completion_cost":0.05323681440000001,"max_cost":0.05323681440000001},"sats_pricing":{"prompt":0.0005202472027942722,"completion":0.0005202472027942722,"request":0.001,"image":0.0,"web_search":2.4773676323536766,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":66.10885255347375,"max_completion_cost":66.10885255347375,"max_cost":66.10885255347375},"per_request_limits":null,"top_provider":{"context_length":127072,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"perplexity/sonar","alias_ids":null,"forwarded_model_id":null},{"id":"deepseek-r1-distill-llama-70b","name":"DeepSeek: R1 Distill Llama 70B","created":1737663169,"description":"DeepSeek R1 Distill Llama 70B is a distilled large language model based on [Llama-3.3-70B-Instruct](/meta-llama/llama-3.3-70b-instruct), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). The model combines advanced distillation techniques to achieve high performance across...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Llama3","instruct_type":"deepseek-r1"},"pricing":{"prompt":2.93265e-07,"completion":3.3516000000000004e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.03843883008,"max_completion_cost":0.005491261440000001,"max_cost":0.03912523776},"sats_pricing":{"prompt":0.0003641730419559904,"completion":0.0004161977622354177,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":47.732888955255575,"max_completion_cost":6.818984136465083,"max_cost":48.58526197231372},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"deepseek/deepseek-r1-distill-llama-70b","alias_ids":null,"forwarded_model_id":null},{"id":"deepseek-r1","name":"DeepSeek: R1","created":1737381095,"description":"DeepSeek R1 is here: Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass....","context_length":64000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek","instruct_type":"deepseek-r1"},"pricing":{"prompt":2.93265e-07,"completion":1.047375e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.01876896,"max_completion_cost":0.016758000000000002,"max_cost":0.030834720000000003},"sats_pricing":{"prompt":0.0003641730419559904,"completion":0.0013006180069856802,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":23.307074685183387,"max_completion_cost":20.809888111770885,"max_cost":38.290194125658424},"per_request_limits":null,"top_provider":{"context_length":64000,"max_completion_tokens":16000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"deepseek/deepseek-r1","alias_ids":null,"forwarded_model_id":null},{"id":"minimax-01","name":"MiniMax: MiniMax-01","created":1736915462,"description":"MiniMax-01 is a combines MiniMax-Text-01 for text generation and MiniMax-VL-01 for image understanding. It has 456 billion parameters, with 45.9 billion parameters activated per inference, and can handle a context...","context_length":1000192,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":8.379000000000001e-08,"completion":4.60845e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.08380608768000002,"max_completion_cost":0.46093348224,"max_cost":0.46093348224},"sats_pricing":{"prompt":0.00010404944055885442,"completion":0.0005722719230736992,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":104.06941805144173,"max_completion_cost":572.3817992829295,"max_cost":572.3817992829295},"per_request_limits":null,"top_provider":{"context_length":1000192,"max_completion_tokens":1000192,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"minimax/minimax-01","alias_ids":null,"forwarded_model_id":null},{"id":"phi-4","name":"Microsoft: Phi 4","created":1736489872,"description":"[Microsoft Research](/microsoft) Phi-4 is designed to perform well in complex reasoning tasks and can operate efficiently in situations with limited memory or where quick responses are needed. At 14 billion...","context_length":16384,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":2.7231750000000005e-08,"completion":5.8652999999999996e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.0004461649920000001,"max_completion_cost":0.0009609707519999999,"max_cost":0.0009609707519999999},"sats_pricing":{"prompt":3.381606818162769e-05,"completion":7.283460839119808e-05,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.5540424610877881,"max_completion_cost":1.1933222238813894,"max_cost":1.1933222238813894},"per_request_limits":null,"top_provider":{"context_length":16384,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"microsoft/phi-4","alias_ids":null,"forwarded_model_id":null},{"id":"l3.1-70b-hanami-x1","name":"Sao10K: Llama 3.1 70B Hanami x1","created":1736302854,"description":"This is [Sao10K](/sao10k)'s experiment over [Euryale v2.2](/sao10k/l3.1-euryale-70b).","context_length":16000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Llama3","instruct_type":null},"pricing":{"prompt":1.25685e-06,"completion":1.25685e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.020109600000000002,"max_completion_cost":0.020109600000000002,"max_cost":0.020109600000000002},"sats_pricing":{"prompt":0.0015607416083828162,"completion":0.0015607416083828162,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":24.97186573412506,"max_completion_cost":24.97186573412506,"max_cost":24.97186573412506},"per_request_limits":null,"top_provider":{"context_length":16000,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"sao10k/l3.1-70b-hanami-x1","alias_ids":null,"forwarded_model_id":null},{"id":"deepseek-chat","name":"DeepSeek: DeepSeek V3","created":1735241320,"description":"DeepSeek-V3 is the latest model from the DeepSeek team, building upon the instruction following and coding abilities of the previous versions. Pre-trained on nearly 15 trillion tokens, the reported evaluations...","context_length":163840,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek","instruct_type":null},"pricing":{"prompt":1.3406400000000003e-07,"completion":3.728655e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.021965045760000003,"max_completion_cost":0.006109028352,"max_cost":0.025877569536000004},"sats_pricing":{"prompt":0.00016647910489416709,"completion":0.0004630200104869021,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":27.275936545860333,"max_completion_cost":7.586119851817404,"max_cost":32.13446274309171},"per_request_limits":null,"top_provider":{"context_length":163840,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"deepseek/deepseek-chat-v3","alias_ids":null,"forwarded_model_id":null},{"id":"l3.3-euryale-70b","name":"Sao10K: Llama 3.3 Euryale 70B","created":1734535928,"description":"Euryale L3.3 70B is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.2](/models/sao10k/l3-euryale-70b).","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Llama3","instruct_type":"llama3"},"pricing":{"prompt":2.7231750000000003e-07,"completion":3.142125e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.035693199360000004,"max_completion_cost":0.0051480576,"max_cost":0.036379607040000005},"sats_pricing":{"prompt":0.0003381606818162769,"completion":0.00039018540209570404,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":44.323396887023044,"max_completion_cost":6.392797627936015,"max_cost":45.17576990408118},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"sao10k/l3.3-euryale-70b-v2.3","alias_ids":null,"forwarded_model_id":null},{"id":"o1","name":"OpenAI: o1","created":1734459999,"description":"The latest and strongest model family from OpenAI, o1 is designed to spend more time thinking before responding. The o1 model series is trained with large-scale reinforcement learning to reason...","context_length":200000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":6.28425e-06,"completion":2.5137e-05,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":2.9925e-06,"input_cache_write":0.0,"max_prompt_cost":1.25685,"max_completion_cost":2.5137,"max_cost":3.142125},"sats_pricing":{"prompt":0.007803708041914081,"completion":0.031214832167656324,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.003716051448530515,"input_cache_write":0.0,"max_prompt_cost":1560.7416083828161,"max_completion_cost":3121.4832167656323,"max_cost":3901.8540209570406},"per_request_limits":null,"top_provider":{"context_length":200000,"max_completion_tokens":100000,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/o1-2024-12-17","alias_ids":null,"forwarded_model_id":null},{"id":"command-r7b-12-2024","name":"Cohere: Command R7B (12-2024)","created":1734158152,"description":"Command R7B (12-2024) is a small, fast update of the Command R+ model, delivered in December 2024. It excels at RAG, tool use, agents, and similar tasks requiring complex reasoning...","context_length":128000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Cohere","instruct_type":null},"pricing":{"prompt":1.5710625e-08,"completion":6.28425e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.00201096,"max_completion_cost":0.00025137,"max_cost":0.0021994875},"sats_pricing":{"prompt":1.9509270104785203e-05,"completion":7.803708041914081e-05,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":2.497186573412506,"max_completion_cost":0.31214832167656326,"max_cost":2.7312978146699285},"per_request_limits":null,"top_provider":{"context_length":128000,"max_completion_tokens":4000,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"cohere/command-r7b-12-2024","alias_ids":null,"forwarded_model_id":null},{"id":"llama-3.3-70b-instruct","name":"Meta: Llama 3.3 70B Instruct","created":1733506137,"description":"The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Llama3","instruct_type":"llama3"},"pricing":{"prompt":4.1895000000000006e-08,"completion":1.3406400000000003e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.005491261440000001,"max_completion_cost":0.0021965045760000005,"max_cost":0.007001358336000001},"sats_pricing":{"prompt":5.202472027942721e-05,"completion":0.00016647910489416709,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":6.818984136465083,"max_completion_cost":2.7275936545860335,"max_cost":8.694204773992983},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"meta-llama/llama-3.3-70b-instruct","alias_ids":null,"forwarded_model_id":null},{"id":"nova-lite-v1","name":"Amazon: Nova Lite 1.0","created":1733437363,"description":"Amazon Nova Lite 1.0 is a very low-cost multimodal model from Amazon that focused on fast processing of image, video, and text inputs to generate text output. Amazon Nova Lite...","context_length":300000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Nova","instruct_type":null},"pricing":{"prompt":2.5137e-08,"completion":1.00548e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.007541099999999999,"max_completion_cost":0.00051480576,"max_cost":0.007927204319999999},"sats_pricing":{"prompt":3.121483216765632e-05,"completion":0.00012485932867062528,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":9.364449650296896,"max_completion_cost":0.6392797627936015,"max_cost":9.843909472392097},"per_request_limits":null,"top_provider":{"context_length":300000,"max_completion_tokens":5120,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"amazon/nova-lite-v1","alias_ids":null,"forwarded_model_id":null},{"id":"nova-micro-v1","name":"Amazon: Nova Micro 1.0","created":1733437237,"description":"Amazon Nova Micro 1.0 is a text-only model that delivers the lowest latency responses in the Amazon Nova family of models at a very low cost. With a context length...","context_length":128000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Nova","instruct_type":null},"pricing":{"prompt":1.4663249999999999e-08,"completion":5.8652999999999996e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.0018768959999999999,"max_completion_cost":0.00030030336,"max_cost":0.0021021235199999997},"sats_pricing":{"prompt":1.820865209779952e-05,"completion":7.283460839119808e-05,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":2.3307074685183387,"max_completion_cost":0.3729131949629342,"max_cost":2.610392364740539},"per_request_limits":null,"top_provider":{"context_length":128000,"max_completion_tokens":5120,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"amazon/nova-micro-v1","alias_ids":null,"forwarded_model_id":null},{"id":"nova-pro-v1","name":"Amazon: Nova Pro 1.0","created":1733436303,"description":"Amazon Nova Pro 1.0 is a capable multimodal model from Amazon focused on providing a combination of accuracy, speed, and cost for a wide range of tasks. As of December...","context_length":300000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Nova","instruct_type":null},"pricing":{"prompt":3.3516000000000004e-07,"completion":1.3406400000000002e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.10054800000000001,"max_completion_cost":0.0068640768000000005,"max_cost":0.1056960576},"sats_pricing":{"prompt":0.0004161977622354177,"completion":0.0016647910489416707,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":124.8593286706253,"max_completion_cost":8.523730170581354,"max_cost":131.2521262985613},"per_request_limits":null,"top_provider":{"context_length":300000,"max_completion_tokens":5120,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"amazon/nova-pro-v1","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-4o-2024-11-20","name":"OpenAI: GPT-4o (2024-11-20)","created":1732127594,"description":"The 2024-11-20 version of GPT-4o offers a leveled-up creative writing ability with more natural, engaging, and tailored writing to improve relevance & readability. It’s also better at working with uploaded...","context_length":128000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":1.047375e-06,"completion":4.1895e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":4.987500000000001e-07,"input_cache_write":0.0,"max_prompt_cost":0.13406400000000002,"max_completion_cost":0.068640768,"max_cost":0.18554457600000002},"sats_pricing":{"prompt":0.0013006180069856802,"completion":0.005202472027942721,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0006193419080884193,"input_cache_write":0.0,"max_prompt_cost":166.47910489416708,"max_completion_cost":85.23730170581354,"max_cost":230.40708117352722},"per_request_limits":null,"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-4o-2024-11-20","alias_ids":null,"forwarded_model_id":null},{"id":"mistral-large-2411","name":"Mistral Large 2411","created":1731978685,"description":"Mistral Large 2 2411 is an update of [Mistral Large 2](/mistralai/mistral-large) released together with [Pixtral Large 2411](/mistralai/pixtral-large-2411) It provides a significant upgrade on the previous [Mistral Large 24.07](/mistralai/mistral-large-2407), with notable...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":8.379000000000002e-07,"completion":2.5137e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":7.98e-08,"input_cache_write":0.0,"max_prompt_cost":0.10982522880000002,"max_completion_cost":0.3294756864,"max_cost":0.3294756864},"sats_pricing":{"prompt":0.0010404944055885444,"completion":0.0031214832167656323,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":9.909470529414706e-05,"input_cache_write":0.0,"max_prompt_cost":136.3796827293017,"max_completion_cost":409.13904818790496,"max_cost":409.13904818790496},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"mistralai/mistral-large-2411","alias_ids":null,"forwarded_model_id":null},{"id":"mistral-large-2407","name":"Mistral Large 2407","created":1731978415,"description":"This is Mistral AI's flagship model, Mistral Large 2 (version mistral-large-2407). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/)....","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":8.379000000000002e-07,"completion":2.5137e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":7.98e-08,"input_cache_write":0.0,"max_prompt_cost":0.10982522880000002,"max_completion_cost":0.3294756864,"max_cost":0.3294756864},"sats_pricing":{"prompt":0.0010404944055885444,"completion":0.0031214832167656323,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":9.909470529414706e-05,"input_cache_write":0.0,"max_prompt_cost":136.3796827293017,"max_completion_cost":409.13904818790496,"max_cost":409.13904818790496},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"mistralai/mistral-large-2407","alias_ids":null,"forwarded_model_id":null},{"id":"pixtral-large-2411","name":"Mistral: Pixtral Large 2411","created":1731977388,"description":"Pixtral Large is a 124B parameter, open-weight, multimodal model built on top of [Mistral Large 2](/mistralai/mistral-large-2411). The model is able to understand documents, charts and natural images. The model is...","context_length":131072,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":8.379000000000002e-07,"completion":2.5137e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":7.98e-08,"input_cache_write":0.0,"max_prompt_cost":0.10982522880000002,"max_completion_cost":0.3294756864,"max_cost":0.3294756864},"sats_pricing":{"prompt":0.0010404944055885444,"completion":0.0031214832167656323,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":9.909470529414706e-05,"input_cache_write":0.0,"max_prompt_cost":136.3796827293017,"max_completion_cost":409.13904818790496,"max_cost":409.13904818790496},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"mistralai/pixtral-large-2411","alias_ids":null,"forwarded_model_id":null},{"id":"qwen-2.5-coder-32b-instruct","name":"Qwen2.5 Coder 32B Instruct","created":1731368400,"description":"Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen). Qwen2.5-Coder brings the following improvements upon CodeQwen1.5: - Significantly improvements in **code generation**, **code reasoning**...","context_length":32768,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen","instruct_type":"chatml"},"pricing":{"prompt":2.76507e-07,"completion":4.189500000000001e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.009060581376,"max_completion_cost":0.013728153600000003,"max_cost":0.013728153600000003},"sats_pricing":{"prompt":0.00034336315384421955,"completion":0.0005202472027942722,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":11.251323825167386,"max_completion_cost":17.04746034116271,"max_cost":17.04746034116271},"per_request_limits":null,"top_provider":{"context_length":32768,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen-2.5-coder-32b-instruct","alias_ids":null,"forwarded_model_id":null},{"id":"unslopnemo-12b","name":"TheDrummer: UnslopNemo 12B","created":1731103448,"description":"UnslopNemo v4.1 is the latest addition from the creator of Rocinante, designed for adventure writing and role-play scenarios.","context_length":32768,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":"mistral"},"pricing":{"prompt":1.6758000000000002e-07,"completion":1.6758000000000002e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.005491261440000001,"max_completion_cost":0.005491261440000001,"max_cost":0.005491261440000001},"sats_pricing":{"prompt":0.00020809888111770884,"completion":0.00020809888111770884,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":6.818984136465083,"max_completion_cost":6.818984136465083,"max_cost":6.818984136465083},"per_request_limits":null,"top_provider":{"context_length":32768,"max_completion_tokens":32768,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"thedrummer/unslopnemo-12b","alias_ids":null,"forwarded_model_id":null},{"id":"claude-3.5-haiku","name":"Anthropic: Claude 3.5 Haiku","created":1730678400,"description":"Claude 3.5 Haiku features offers enhanced capabilities in speed, coding accuracy, and tool use. Engineered to excel in real-time applications, it delivers quick response times that are essential for dynamic...","context_length":200000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Claude","instruct_type":null},"pricing":{"prompt":3.3516000000000004e-07,"completion":1.6758000000000003e-06,"request":0.0,"image":0.0,"web_search":0.0039900000000000005,"internal_reasoning":0.0,"input_cache_read":3.1920000000000004e-08,"input_cache_write":3.99e-07,"max_prompt_cost":0.06703200000000001,"max_completion_cost":0.013728153600000003,"max_cost":0.07801452288},"sats_pricing":{"prompt":0.0004161977622354177,"completion":0.002080988811177089,"request":0.001,"image":0.0,"web_search":4.954735264707353,"internal_reasoning":0.0,"input_cache_read":3.9637882117658825e-05,"input_cache_write":0.0004954735264707354,"max_prompt_cost":83.23955244708354,"max_completion_cost":17.04746034116271,"max_cost":96.8775207200137},"per_request_limits":null,"top_provider":{"context_length":200000,"max_completion_tokens":8192,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"anthropic/claude-3-5-haiku","alias_ids":null,"forwarded_model_id":null},{"id":"magnum-v4-72b","name":"Magnum v4 72B","created":1729555200,"description":"This is a series of models designed to replicate the prose quality of the Claude 3 models, specifically Sonnet(https://openrouter.ai/anthropic/claude-3.5-sonnet) and Opus(https://openrouter.ai/anthropic/claude-3-opus).\n\nThe model is fine-tuned on top of [Qwen2.5 72B](https://openrouter.ai/qwen/qwen-2.5-72b-instruct).","context_length":16384,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen","instruct_type":"chatml"},"pricing":{"prompt":1.25685e-06,"completion":2.09475e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.0205922304,"max_completion_cost":0.004290048,"max_cost":0.022308249600000003},"sats_pricing":{"prompt":0.0015607416083828162,"completion":0.0026012360139713604,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":25.57119051174406,"max_completion_cost":5.327331356613346,"max_cost":27.702123054389403},"per_request_limits":null,"top_provider":{"context_length":16384,"max_completion_tokens":2048,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"anthracite-org/magnum-v4-72b","alias_ids":null,"forwarded_model_id":null},{"id":"qwen-2.5-7b-instruct","name":"Qwen: Qwen2.5 7B Instruct","created":1729036800,"description":"Qwen2.5 7B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2: - Significantly more knowledge and has greatly improved capabilities in coding and...","context_length":32768,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen","instruct_type":"chatml"},"pricing":{"prompt":1.6758000000000004e-08,"completion":4.1895000000000006e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.0005491261440000001,"max_completion_cost":0.0013728153600000002,"max_cost":0.0013728153600000002},"sats_pricing":{"prompt":2.0809888111770886e-05,"completion":5.202472027942721e-05,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.6818984136465084,"max_completion_cost":1.7047460341162708,"max_cost":1.7047460341162708},"per_request_limits":null,"top_provider":{"context_length":32768,"max_completion_tokens":32768,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen-2.5-7b-instruct","alias_ids":null,"forwarded_model_id":null},{"id":"llama-3.1-nemotron-70b-instruct","name":"NVIDIA: Llama 3.1 Nemotron 70B Instruct","created":1728950400,"description":"NVIDIA's Llama 3.1 Nemotron 70B is a language model designed for generating precise and useful responses. Leveraging [Llama 3.1 70B](/models/meta-llama/llama-3.1-70b-instruct) architecture and Reinforcement Learning from Human Feedback (RLHF), it excels...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Llama3","instruct_type":"llama3"},"pricing":{"prompt":5.0274e-07,"completion":5.0274e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.06589513728,"max_completion_cost":0.00823689216,"max_cost":0.06589513728},"sats_pricing":{"prompt":0.0006242966433531265,"completion":0.0006242966433531265,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":81.827809637581,"max_completion_cost":10.228476204697625,"max_cost":81.827809637581},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"nvidia/llama-3.1-nemotron-70b-instruct","alias_ids":null,"forwarded_model_id":null},{"id":"inflection-3-pi","name":"Inflection: Inflection 3 Pi","created":1728604800,"description":"Inflection 3 Pi powers Inflection's [Pi](https://pi.ai) chatbot, including backstory, emotional intelligence, productivity, and safety. It has access to recent news, and excels in scenarios like customer support and roleplay. Pi...","context_length":8000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":1.047375e-06,"completion":4.1895e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.008379000000000001,"max_completion_cost":0.004290048,"max_cost":0.011596536000000001},"sats_pricing":{"prompt":0.0013006180069856802,"completion":0.005202472027942721,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":10.404944055885442,"max_completion_cost":5.327331356613346,"max_cost":14.400442573345451},"per_request_limits":null,"top_provider":{"context_length":8000,"max_completion_tokens":1024,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"inflection/inflection-3-pi","alias_ids":null,"forwarded_model_id":null},{"id":"inflection-3-productivity","name":"Inflection: Inflection 3 Productivity","created":1728604800,"description":"Inflection 3 Productivity is optimized for following instructions. It is better for tasks requiring JSON output or precise adherence to provided guidelines. It has access to recent news. For emotional...","context_length":8000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":1.047375e-06,"completion":4.1895e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.008379000000000001,"max_completion_cost":0.004290048,"max_cost":0.011596536000000001},"sats_pricing":{"prompt":0.0013006180069856802,"completion":0.005202472027942721,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":10.404944055885442,"max_completion_cost":5.327331356613346,"max_cost":14.400442573345451},"per_request_limits":null,"top_provider":{"context_length":8000,"max_completion_tokens":1024,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"inflection/inflection-3-productivity","alias_ids":null,"forwarded_model_id":null},{"id":"rocinante-12b","name":"TheDrummer: Rocinante 12B","created":1727654400,"description":"Rocinante 12B is designed for engaging storytelling and rich prose. Early testers have reported: - Expanded vocabulary with unique and expressive word choices - Enhanced creativity for vivid narratives -...","context_length":32768,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen","instruct_type":"chatml"},"pricing":{"prompt":7.12215e-08,"completion":1.801485e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.002333786112,"max_completion_cost":0.005903106048,"max_cost":0.005903106048},"sats_pricing":{"prompt":8.844202447502624e-05,"completion":0.00022370629720153698,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":2.89806825799766,"max_completion_cost":7.330407946699964,"max_cost":7.330407946699964},"per_request_limits":null,"top_provider":{"context_length":32768,"max_completion_tokens":32768,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"thedrummer/rocinante-12b","alias_ids":null,"forwarded_model_id":null},{"id":"llama-3.2-1b-instruct","name":"Meta: Llama 3.2 1B Instruct","created":1727222400,"description":"Llama 3.2 1B is a 1-billion-parameter language model focused on efficiently performing natural language tasks, such as summarization, dialogue, and multilingual text analysis. Its smaller size allows it to operate...","context_length":60000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Llama3","instruct_type":"llama3"},"pricing":{"prompt":1.131165e-08,"completion":8.379000000000001e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.000678699,"max_completion_cost":0.0050274000000000004,"max_cost":0.0050274000000000004},"sats_pricing":{"prompt":1.4046674475445346e-05,"completion":0.00010404944055885442,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.8428004685267206,"max_completion_cost":6.242966433531265,"max_cost":6.242966433531265},"per_request_limits":null,"top_provider":{"context_length":60000,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"meta-llama/llama-3.2-1b-instruct","alias_ids":null,"forwarded_model_id":null},{"id":"llama-3.2-11b-vision-instruct","name":"Meta: Llama 3.2 11B Vision Instruct","created":1727222400,"description":"Llama 3.2 11B Vision is a multimodal model with 11 billion parameters, designed to handle tasks combining visual and textual data. It excels in tasks such as image captioning and...","context_length":131072,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Llama3","instruct_type":"llama3"},"pricing":{"prompt":1.0264275000000001e-07,"completion":1.0264275000000001e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.013453590528000001,"max_completion_cost":0.0016816988160000001,"max_cost":0.013453590528000001},"sats_pricing":{"prompt":0.00012746056468459667,"completion":0.00012746056468459667,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":16.706511134339454,"max_completion_cost":2.0883138917924318,"max_cost":16.706511134339454},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"meta-llama/llama-3.2-11b-vision-instruct","alias_ids":null,"forwarded_model_id":null},{"id":"llama-3.2-3b-instruct","name":"Meta: Llama 3.2 3B Instruct","created":1727222400,"description":"Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization. Designed with the latest transformer architecture, it...","context_length":80000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Llama3","instruct_type":"llama3"},"pricing":{"prompt":2.1366450000000002e-08,"completion":1.42443e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.0017093160000000002,"max_completion_cost":0.01139544,"max_cost":0.01139544},"sats_pricing":{"prompt":2.6532607342507875e-05,"completion":0.0001768840489500525,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":2.1226085874006304,"max_completion_cost":14.1507239160042,"max_cost":14.1507239160042},"per_request_limits":null,"top_provider":{"context_length":80000,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"meta-llama/llama-3.2-3b-instruct","alias_ids":null,"forwarded_model_id":null},{"id":"qwen-2.5-72b-instruct","name":"Qwen2.5 72B Instruct","created":1726704000,"description":"Qwen2.5 72B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2: - Significantly more knowledge and has greatly improved capabilities in coding and...","context_length":32768,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen","instruct_type":"chatml"},"pricing":{"prompt":1.5082200000000003e-07,"completion":1.6758000000000002e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.004942135296000001,"max_completion_cost":0.0027456307200000004,"max_cost":0.005216698368000001},"sats_pricing":{"prompt":0.00018728899300593797,"completion":0.00020809888111770884,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":6.137085722818576,"max_completion_cost":3.4094920682325416,"max_cost":6.478034929641829},"per_request_limits":null,"top_provider":{"context_length":32768,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"qwen/qwen-2.5-72b-instruct","alias_ids":null,"forwarded_model_id":null},{"id":"command-r-08-2024","name":"Cohere: Command R (08-2024)","created":1724976000,"description":"command-r-08-2024 is an update of the [Command R](/models/cohere/command-r) with improved performance for multilingual retrieval-augmented generation (RAG) and tool use. More broadly, it is better at math, code and reasoning and...","context_length":128000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Cohere","instruct_type":null},"pricing":{"prompt":6.28425e-08,"completion":2.5137e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.00804384,"max_completion_cost":0.00100548,"max_cost":0.00879795},"sats_pricing":{"prompt":7.803708041914081e-05,"completion":0.00031214832167656326,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":9.988746293650024,"max_completion_cost":1.248593286706253,"max_cost":10.925191258679714},"per_request_limits":null,"top_provider":{"context_length":128000,"max_completion_tokens":4000,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"cohere/command-r-08-2024","alias_ids":null,"forwarded_model_id":null},{"id":"command-r-plus-08-2024","name":"Cohere: Command R+ (08-2024)","created":1724976000,"description":"command-r-plus-08-2024 is an update of the [Command R+](/models/cohere/command-r-plus) with roughly 50% higher throughput and 25% lower latencies as compared to the previous Command R+ version, while keeping the hardware footprint...","context_length":128000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Cohere","instruct_type":null},"pricing":{"prompt":1.047375e-06,"completion":4.1895e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.13406400000000002,"max_completion_cost":0.016758000000000002,"max_cost":0.1466325},"sats_pricing":{"prompt":0.0013006180069856802,"completion":0.005202472027942721,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":166.47910489416708,"max_completion_cost":20.809888111770885,"max_cost":182.0865209779952},"per_request_limits":null,"top_provider":{"context_length":128000,"max_completion_tokens":4000,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"cohere/command-r-plus-08-2024","alias_ids":null,"forwarded_model_id":null},{"id":"l3.1-euryale-70b","name":"Sao10K: Llama 3.1 Euryale 70B v2.2","created":1724803200,"description":"Euryale L3.1 70B v2.2 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.1](/models/sao10k/l3-euryale-70b).","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Llama3","instruct_type":"llama3"},"pricing":{"prompt":3.561075e-07,"completion":3.561075e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.04667572224,"max_completion_cost":0.00583446528,"max_cost":0.04667572224},"sats_pricing":{"prompt":0.00044221012237513125,"completion":0.00044221012237513125,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":57.9613651599532,"max_completion_cost":7.24517064499415,"max_cost":57.9613651599532},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"sao10k/l3.1-euryale-70b","alias_ids":null,"forwarded_model_id":null},{"id":"hermes-3-llama-3.1-70b","name":"Nous: Hermes 3 70B Instruct","created":1723939200,"description":"Hermes 3 is a generalist language model with many improvements over [Hermes 2](/models/nousresearch/nous-hermes-2-mistral-7b-dpo), including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Llama3","instruct_type":"chatml"},"pricing":{"prompt":1.25685e-07,"completion":1.25685e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.01647378432,"max_completion_cost":0.00205922304,"max_cost":0.01647378432},"sats_pricing":{"prompt":0.00015607416083828163,"completion":0.00015607416083828163,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":20.45695240939525,"max_completion_cost":2.557119051174406,"max_cost":20.45695240939525},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"nousresearch/hermes-3-llama-3.1-70b","alias_ids":null,"forwarded_model_id":null},{"id":"hermes-3-llama-3.1-405b","name":"Nous: Hermes 3 405B Instruct","created":1723766400,"description":"Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Llama3","instruct_type":"chatml"},"pricing":{"prompt":4.189500000000001e-07,"completion":4.189500000000001e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.05491261440000001,"max_completion_cost":0.006864076800000001,"max_cost":0.05491261440000001},"sats_pricing":{"prompt":0.0005202472027942722,"completion":0.0005202472027942722,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":68.18984136465085,"max_completion_cost":8.523730170581356,"max_cost":68.18984136465085},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"nousresearch/hermes-3-llama-3.1-405b","alias_ids":null,"forwarded_model_id":null},{"id":"l3-lunaris-8b","name":"Sao10K: Llama 3 8B Lunaris","created":1723507200,"description":"Lunaris 8B is a versatile generalist and roleplaying model based on Llama 3. It's a strategic merge of multiple models, designed to balance creativity with improved logic and general knowledge....","context_length":8192,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Llama3","instruct_type":"llama3"},"pricing":{"prompt":1.6758000000000004e-08,"completion":2.0947500000000003e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.00013728153600000003,"max_completion_cost":0.00017160192000000002,"max_cost":0.00017160192000000002},"sats_pricing":{"prompt":2.0809888111770886e-05,"completion":2.6012360139713605e-05,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.1704746034116271,"max_completion_cost":0.21309325426453385,"max_cost":0.21309325426453385},"per_request_limits":null,"top_provider":{"context_length":8192,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"sao10k/l3-lunaris-8b","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-4o-2024-08-06","name":"OpenAI: GPT-4o (2024-08-06)","created":1722902400,"description":"The 2024-08-06 version of GPT-4o offers improved performance in structured outputs, with the ability to supply a JSON schema in the respone_format. Read more [here](https://openai.com/index/introducing-structured-outputs-in-the-api/). GPT-4o (\"o\" for \"omni\") is...","context_length":128000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":1.047375e-06,"completion":4.1895e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":4.987500000000001e-07,"input_cache_write":0.0,"max_prompt_cost":0.13406400000000002,"max_completion_cost":0.068640768,"max_cost":0.18554457600000002},"sats_pricing":{"prompt":0.0013006180069856802,"completion":0.005202472027942721,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0006193419080884193,"input_cache_write":0.0,"max_prompt_cost":166.47910489416708,"max_completion_cost":85.23730170581354,"max_cost":230.40708117352722},"per_request_limits":null,"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-4o-2024-08-06","alias_ids":null,"forwarded_model_id":null},{"id":"llama-3.1-8b-instruct","name":"Meta: Llama 3.1 8B Instruct","created":1721692800,"description":"Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient. It has demonstrated strong performance compared to...","context_length":16384,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Llama3","instruct_type":"llama3"},"pricing":{"prompt":8.379000000000002e-09,"completion":2.0947500000000003e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.00013728153600000003,"max_completion_cost":0.00034320384000000005,"max_cost":0.00034320384000000005},"sats_pricing":{"prompt":1.0404944055885443e-05,"completion":2.6012360139713605e-05,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.1704746034116271,"max_completion_cost":0.4261865085290677,"max_cost":0.4261865085290677},"per_request_limits":null,"top_provider":{"context_length":16384,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"meta-llama/llama-3.1-8b-instruct","alias_ids":null,"forwarded_model_id":null},{"id":"llama-3.1-70b-instruct","name":"Meta: Llama 3.1 70B Instruct","created":1721692800,"description":"Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 70B instruct-tuned version is optimized for high quality dialogue usecases. It has demonstrated strong...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Llama3","instruct_type":"llama3"},"pricing":{"prompt":1.6758000000000002e-07,"completion":1.6758000000000002e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.021965045760000003,"max_completion_cost":0.0027456307200000004,"max_cost":0.021965045760000003},"sats_pricing":{"prompt":0.00020809888111770884,"completion":0.00020809888111770884,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":27.275936545860333,"max_completion_cost":3.4094920682325416,"max_cost":27.275936545860333},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"meta-llama/llama-3.1-70b-instruct","alias_ids":null,"forwarded_model_id":null},{"id":"mistral-nemo","name":"Mistral: Mistral Nemo","created":1721347200,"description":"A 12B parameter model with a 128k token context length built by Mistral in collaboration with NVIDIA. The model is multilingual, supporting English, French, German, Spanish, Italian, Portuguese, Chinese, Japanese,...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":"mistral"},"pricing":{"prompt":8.379000000000002e-09,"completion":1.25685e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.0010982522880000002,"max_completion_cost":0.001647378432,"max_cost":0.001647378432},"sats_pricing":{"prompt":1.0404944055885443e-05,"completion":1.560741608382816e-05,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":1.3637968272930168,"max_completion_cost":2.0456952409395246,"max_cost":2.0456952409395246},"per_request_limits":null,"top_provider":{"context_length":131072,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"mistralai/mistral-nemo","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-4o-mini-2024-07-18","name":"OpenAI: GPT-4o-mini (2024-07-18)","created":1721260800,"description":"GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs. As their most advanced small model, it is many multiples more affordable...","context_length":128000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":6.28425e-08,"completion":2.5137e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":2.9925e-08,"input_cache_write":0.0,"max_prompt_cost":0.00804384,"max_completion_cost":0.00411844608,"max_cost":0.01113267456},"sats_pricing":{"prompt":7.803708041914081e-05,"completion":0.00031214832167656326,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":3.7160514485305145e-05,"input_cache_write":0.0,"max_prompt_cost":9.988746293650024,"max_completion_cost":5.114238102348812,"max_cost":13.824424870411631},"per_request_limits":null,"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-4o-mini-2024-07-18","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-4o-mini","name":"OpenAI: GPT-4o-mini","created":1721260800,"description":"GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs. As their most advanced small model, it is many multiples more affordable...","context_length":128000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":6.28425e-08,"completion":2.5137e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":2.9925e-08,"input_cache_write":0.0,"max_prompt_cost":0.00804384,"max_completion_cost":0.00411844608,"max_cost":0.01113267456},"sats_pricing":{"prompt":7.803708041914081e-05,"completion":0.00031214832167656326,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":3.7160514485305145e-05,"input_cache_write":0.0,"max_prompt_cost":9.988746293650024,"max_completion_cost":5.114238102348812,"max_cost":13.824424870411631},"per_request_limits":null,"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-4o-mini","alias_ids":null,"forwarded_model_id":null},{"id":"gemma-2-27b-it","name":"Google: Gemma 2 27B","created":1720828800,"description":"Gemma 2 27B by Google is an open model built from the same research and technology used to create the [Gemini models](/models?q=gemini). Gemma models are well-suited for a variety of...","context_length":8192,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Gemini","instruct_type":"gemma"},"pricing":{"prompt":2.7231750000000003e-07,"completion":2.7231750000000003e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.0022308249600000002,"max_completion_cost":0.0005577062400000001,"max_cost":0.0022308249600000002},"sats_pricing":{"prompt":0.0003381606818162769,"completion":0.0003381606818162769,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":2.7702123054389403,"max_completion_cost":0.6925530763597351,"max_cost":2.7702123054389403},"per_request_limits":null,"top_provider":{"context_length":8192,"max_completion_tokens":2048,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"google/gemma-2-27b-it","alias_ids":null,"forwarded_model_id":null},{"id":"l3-euryale-70b","name":"Sao10k: Llama 3 Euryale 70B v2.1","created":1718668800,"description":"Euryale 70B v2.1 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). - Better prompt adherence. - Better anatomy / spatial awareness. - Adapts much better to unique and custom...","context_length":8192,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Llama3","instruct_type":"llama3"},"pricing":{"prompt":6.200460000000001e-07,"completion":6.200460000000001e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.005079416832000001,"max_completion_cost":0.005079416832000001,"max_cost":0.005079416832000001},"sats_pricing":{"prompt":0.0007699658601355228,"completion":0.0007699658601355228,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":6.3075603262302025,"max_completion_cost":6.3075603262302025,"max_cost":6.3075603262302025},"per_request_limits":null,"top_provider":{"context_length":8192,"max_completion_tokens":8192,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"sao10k/l3-euryale-70b","alias_ids":null,"forwarded_model_id":null},{"id":"hermes-2-pro-llama-3-8b","name":"NousResearch: Hermes 2 Pro - Llama-3 8B","created":1716768000,"description":"Hermes 2 Pro is an upgraded, retrained version of Nous Hermes 2, consisting of an updated and cleaned version of the OpenHermes 2.5 Dataset, as well as a newly introduced...","context_length":8192,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Llama3","instruct_type":"chatml"},"pricing":{"prompt":5.8652999999999996e-08,"completion":5.8652999999999996e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.00048048537599999997,"max_completion_cost":0.00048048537599999997,"max_cost":0.00048048537599999997},"sats_pricing":{"prompt":7.283460839119808e-05,"completion":7.283460839119808e-05,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.5966611119406947,"max_completion_cost":0.5966611119406947,"max_cost":0.5966611119406947},"per_request_limits":null,"top_provider":{"context_length":8192,"max_completion_tokens":8192,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"nousresearch/hermes-2-pro-llama-3-8b","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-4o","name":"OpenAI: GPT-4o","created":1715558400,"description":"GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as...","context_length":128000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":1.047375e-06,"completion":4.1895e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.13406400000000002,"max_completion_cost":0.068640768,"max_cost":0.18554457600000002},"sats_pricing":{"prompt":0.0013006180069856802,"completion":0.005202472027942721,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":166.47910489416708,"max_completion_cost":85.23730170581354,"max_cost":230.40708117352722},"per_request_limits":null,"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-4o","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-4o-2024-05-13","name":"OpenAI: GPT-4o (2024-05-13)","created":1715558400,"description":"GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as...","context_length":128000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":2.09475e-06,"completion":6.28425e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.26812800000000003,"max_completion_cost":0.025740288,"max_cost":0.285288192},"sats_pricing":{"prompt":0.0026012360139713604,"completion":0.007803708041914081,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":332.95820978833416,"max_completion_cost":31.963988139680076,"max_cost":354.2675352147875},"per_request_limits":null,"top_provider":{"context_length":128000,"max_completion_tokens":4096,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-4o-2024-05-13","alias_ids":null,"forwarded_model_id":null},{"id":"llama-3-70b-instruct","name":"Meta: Llama 3 70B Instruct","created":1713398400,"description":"Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 70B instruct-tuned version was optimized for high quality dialogue usecases. It has demonstrated strong...","context_length":8192,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Llama3","instruct_type":"llama3"},"pricing":{"prompt":2.1366450000000001e-07,"completion":3.1002300000000005e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.0017503395840000001,"max_completion_cost":0.0024801840000000003,"max_cost":0.0025212075840000004},"sats_pricing":{"prompt":0.00026532607342507876,"completion":0.0003849829300677614,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":2.173551193498245,"max_completion_cost":3.079863440542091,"max_cost":3.130806046639706},"per_request_limits":null,"top_provider":{"context_length":8192,"max_completion_tokens":8000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"meta-llama/llama-3-70b-instruct","alias_ids":null,"forwarded_model_id":null},{"id":"llama-3-8b-instruct","name":"Meta: Llama 3 8B Instruct","created":1713398400,"description":"Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases. It has demonstrated strong...","context_length":8192,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Llama3","instruct_type":"llama3"},"pricing":{"prompt":1.25685e-08,"completion":1.6758000000000004e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.000102961152,"max_completion_cost":0.00013728153600000003,"max_cost":0.00013728153600000003},"sats_pricing":{"prompt":1.560741608382816e-05,"completion":2.0809888111770886e-05,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.1278559525587203,"max_completion_cost":0.1704746034116271,"max_cost":0.1704746034116271},"per_request_limits":null,"top_provider":{"context_length":8192,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"meta-llama/llama-3-8b-instruct","alias_ids":null,"forwarded_model_id":null},{"id":"mixtral-8x22b-instruct","name":"Mistral: Mixtral 8x22B Instruct","created":1713312000,"description":"Mistral's official instruct fine-tuned version of [Mixtral 8x22B](/models/mistralai/mixtral-8x22b). It uses 39B active parameters out of 141B, offering unparalleled cost efficiency for its size. Its strengths include: - strong math, coding,...","context_length":65536,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":"mistral"},"pricing":{"prompt":8.379000000000002e-07,"completion":2.5137e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":7.98e-08,"input_cache_write":0.0,"max_prompt_cost":0.05491261440000001,"max_completion_cost":0.1647378432,"max_cost":0.1647378432},"sats_pricing":{"prompt":0.0010404944055885444,"completion":0.0031214832167656323,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":9.909470529414706e-05,"input_cache_write":0.0,"max_prompt_cost":68.18984136465085,"max_completion_cost":204.56952409395248,"max_cost":204.56952409395248},"per_request_limits":null,"top_provider":{"context_length":65536,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"mistralai/mixtral-8x22b-instruct","alias_ids":null,"forwarded_model_id":null},{"id":"wizardlm-2-8x22b","name":"WizardLM-2 8x22B","created":1713225600,"description":"WizardLM-2 8x22B is Microsoft AI's most advanced Wizard model. It demonstrates highly competitive performance compared to leading proprietary models, and it consistently outperforms all existing state-of-the-art opensource models. It is...","context_length":65535,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":"vicuna"},"pricing":{"prompt":2.59749e-07,"completion":2.59749e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.017022650715,"max_completion_cost":0.002077992,"max_cost":0.017022650715},"sats_pricing":{"prompt":0.0003225532657324487,"completion":0.0003225532657324487,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":21.138528269776025,"max_completion_cost":2.580426125859589,"max_cost":21.138528269776025},"per_request_limits":null,"top_provider":{"context_length":65535,"max_completion_tokens":8000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"microsoft/wizardlm-2-8x22b","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-4-turbo","name":"OpenAI: GPT-4 Turbo","created":1712620800,"description":"The latest GPT-4 Turbo model with vision capabilities. Vision requests can now use JSON mode and function calling.\n\nTraining data: up to December 2023.","context_length":128000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":4.1895e-06,"completion":1.25685e-05,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.5362560000000001,"max_completion_cost":0.051480576,"max_cost":0.570576384},"sats_pricing":{"prompt":0.005202472027942721,"completion":0.015607416083828162,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":665.9164195766683,"max_completion_cost":63.92797627936015,"max_cost":708.535070429575},"per_request_limits":null,"top_provider":{"context_length":128000,"max_completion_tokens":4096,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-4-turbo","alias_ids":null,"forwarded_model_id":null},{"id":"claude-3-haiku","name":"Anthropic: Claude 3 Haiku","created":1710288000,"description":"Claude 3 Haiku is Anthropic's fastest and most compact model for\nnear-instant responsiveness. Quick and accurate targeted performance.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-haiku)\n\n#multimodal","context_length":200000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Claude","instruct_type":null},"pricing":{"prompt":1.0473750000000002e-07,"completion":5.236875e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.1969999999999999e-08,"input_cache_write":1.197e-07,"max_prompt_cost":0.020947500000000004,"max_completion_cost":0.002145024,"max_cost":0.022663519200000003},"sats_pricing":{"prompt":0.00013006180069856805,"completion":0.0006503090034928401,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":1.4864205794122058e-05,"input_cache_write":0.00014864205794122058,"max_prompt_cost":26.012360139713607,"max_completion_cost":2.663665678306673,"max_cost":28.143292682358943},"per_request_limits":null,"top_provider":{"context_length":200000,"max_completion_tokens":4096,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"anthropic/claude-3-haiku","alias_ids":null,"forwarded_model_id":null},{"id":"mistral-large","name":"Mistral Large","created":1708905600,"description":"This is Mistral AI's flagship model, Mistral Large 2 (version `mistral-large-2407`). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/)....","context_length":128000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":8.379000000000002e-07,"completion":2.5137e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":7.98e-08,"input_cache_write":0.0,"max_prompt_cost":0.10725120000000002,"max_completion_cost":0.32175360000000003,"max_cost":0.32175360000000003},"sats_pricing":{"prompt":0.0010404944055885444,"completion":0.0031214832167656323,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":9.909470529414706e-05,"input_cache_write":0.0,"max_prompt_cost":133.18328391533367,"max_completion_cost":399.54985174600097,"max_cost":399.54985174600097},"per_request_limits":null,"top_provider":{"context_length":128000,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"mistralai/mistral-large","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-4-turbo-preview","name":"OpenAI: GPT-4 Turbo Preview","created":1706140800,"description":"The preview GPT-4 model with improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more. Training data: up to Dec 2023. **Note:** heavily rate limited by OpenAI while...","context_length":128000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":4.1895e-06,"completion":1.25685e-05,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.5362560000000001,"max_completion_cost":0.051480576,"max_cost":0.570576384},"sats_pricing":{"prompt":0.005202472027942721,"completion":0.015607416083828162,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":665.9164195766683,"max_completion_cost":63.92797627936015,"max_cost":708.535070429575},"per_request_limits":null,"top_provider":{"context_length":128000,"max_completion_tokens":4096,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-4-turbo-preview","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-3.5-turbo-0613","name":"OpenAI: GPT-3.5 Turbo (older v0613)","created":1706140800,"description":"GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.\n\nTraining data up to Sep 2021.","context_length":4095,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":4.189500000000001e-07,"completion":8.379000000000002e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.0017156002500000004,"max_completion_cost":0.0034312005000000007,"max_cost":0.0034312005000000007},"sats_pricing":{"prompt":0.0005202472027942722,"completion":0.0010404944055885444,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":2.1304122954425444,"max_completion_cost":4.260824590885089,"max_cost":4.260824590885089},"per_request_limits":null,"top_provider":{"context_length":4095,"max_completion_tokens":4096,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-3.5-turbo-0613","alias_ids":null,"forwarded_model_id":null},{"id":"mixtral-8x7b-instruct","name":"Mistral: Mixtral 8x7B Instruct","created":1702166400,"description":"Mixtral 8x7B Instruct is a pretrained generative Sparse Mixture of Experts, by Mistral AI, for chat and instruction use. Incorporates 8 experts (feed-forward networks) for a total of 47 billion...","context_length":32768,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":"mistral"},"pricing":{"prompt":2.2623299999999999e-07,"completion":2.2623299999999999e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.0074132029439999995,"max_completion_cost":0.0037066014719999998,"max_cost":0.0074132029439999995},"sats_pricing":{"prompt":0.0002809334895089069,"completion":0.0002809334895089069,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":9.205628584227862,"max_completion_cost":4.602814292113931,"max_cost":9.205628584227862},"per_request_limits":null,"top_provider":{"context_length":32768,"max_completion_tokens":16384,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"mistralai/mixtral-8x7b-instruct","alias_ids":null,"forwarded_model_id":null},{"id":"goliath-120b","name":"Goliath 120B","created":1699574400,"description":"A large LLM created by combining two fine-tuned Llama 70B models into one 120B model. Combines Xwin and Euryale. Credits to - [@chargoddard](https://huggingface.co/chargoddard) for developing the framework used to merge...","context_length":6144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Llama2","instruct_type":"airoboros"},"pricing":{"prompt":1.5710625e-06,"completion":3.142125e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.009652608,"max_completion_cost":0.003217536,"max_cost":0.011261376},"sats_pricing":{"prompt":0.0019509270104785203,"completion":0.0039018540209570405,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":11.986495552380028,"max_completion_cost":3.9954985174600095,"max_cost":13.984244811110033},"per_request_limits":null,"top_provider":{"context_length":6144,"max_completion_tokens":1024,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"alpindale/goliath-120b","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-4-1106-preview","name":"OpenAI: GPT-4 Turbo (older v1106)","created":1699228800,"description":"The latest GPT-4 Turbo model with vision capabilities. Vision requests can now use JSON mode and function calling.\n\nTraining data: up to April 2023.","context_length":128000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":4.1895e-06,"completion":1.25685e-05,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.5362560000000001,"max_completion_cost":0.051480576,"max_cost":0.570576384},"sats_pricing":{"prompt":0.005202472027942721,"completion":0.015607416083828162,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":665.9164195766683,"max_completion_cost":63.92797627936015,"max_cost":708.535070429575},"per_request_limits":null,"top_provider":{"context_length":128000,"max_completion_tokens":4096,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-4-1106-preview","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-3.5-turbo-instruct","name":"OpenAI: GPT-3.5 Turbo Instruct","created":1695859200,"description":"This model is a variant of GPT-3.5 Turbo tuned for instructional prompts and omitting chat-related optimizations. Training data: up to Sep 2021.","context_length":4095,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":"chatml"},"pricing":{"prompt":6.28425e-07,"completion":8.379000000000002e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.0025734003750000002,"max_completion_cost":0.0034312005000000007,"max_cost":0.0034312005000000007},"sats_pricing":{"prompt":0.0007803708041914081,"completion":0.0010404944055885444,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":3.1956184431638164,"max_completion_cost":4.260824590885089,"max_cost":4.260824590885089},"per_request_limits":null,"top_provider":{"context_length":4095,"max_completion_tokens":4096,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-3.5-turbo-instruct","alias_ids":null,"forwarded_model_id":null},{"id":"mistral-7b-instruct-v0.1","name":"Mistral: Mistral 7B Instruct v0.1","created":1695859200,"description":"A 7.3B parameter model that outperforms Llama 2 13B on all benchmarks, with optimizations for speed and context length.","context_length":2824,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":"mistral"},"pricing":{"prompt":4.6084500000000005e-08,"completion":7.960050000000001e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.000130142628,"max_completion_cost":0.00022479181200000005,"max_cost":0.00022479181200000005},"sats_pricing":{"prompt":5.722719230736993e-05,"completion":9.88469685309117e-05,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.1616095910760127,"max_completion_cost":0.27914383913129465,"max_cost":0.27914383913129465},"per_request_limits":null,"top_provider":{"context_length":2824,"max_completion_tokens":null,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"mistralai/mistral-7b-instruct-v0.1","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-3.5-turbo-16k","name":"OpenAI: GPT-3.5 Turbo 16k","created":1693180800,"description":"This model offers four times the context length of gpt-3.5-turbo, allowing it to support approximately 20 pages of text in a single request at a higher cost. Training data: up...","context_length":16385,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":1.25685e-06,"completion":1.6758000000000003e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.02059348725,"max_completion_cost":0.006864076800000001,"max_cost":0.022309506450000002},"sats_pricing":{"prompt":0.0015607416083828162,"completion":0.002080988811177089,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":25.572751253352443,"max_completion_cost":8.523730170581356,"max_cost":27.703683795997783},"per_request_limits":null,"top_provider":{"context_length":16385,"max_completion_tokens":4096,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-3.5-turbo-16k","alias_ids":null,"forwarded_model_id":null},{"id":"weaver","name":"Mancer: Weaver (alpha)","created":1690934400,"description":"An attempt to recreate Claude-style verbosity, but don't expect the same level of coherence or memory. Meant for use in roleplay/narrative situations.","context_length":8000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Llama2","instruct_type":"alpaca"},"pricing":{"prompt":3.142125e-07,"completion":4.189500000000001e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.0025137000000000002,"max_completion_cost":0.0008379000000000001,"max_cost":0.0027231750000000004},"sats_pricing":{"prompt":0.00039018540209570404,"completion":0.0005202472027942722,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":3.1214832167656326,"max_completion_cost":1.0404944055885443,"max_cost":3.3816068181627688},"per_request_limits":null,"top_provider":{"context_length":8000,"max_completion_tokens":2000,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"mancer/weaver","alias_ids":null,"forwarded_model_id":null},{"id":"remm-slerp-l2-13b","name":"ReMM SLERP 13B","created":1689984000,"description":"A recreation trial of the original MythoMax-L2-B13 but with updated models. #merge","context_length":6144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Llama2","instruct_type":"alpaca"},"pricing":{"prompt":1.885275e-07,"completion":2.7231750000000003e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.00115831296,"max_completion_cost":0.0011154124800000001,"max_cost":0.0015015168},"sats_pricing":{"prompt":0.00023411124125742241,"completion":0.0003381606818162769,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":1.4383794662856033,"max_completion_cost":1.3851061527194701,"max_cost":1.8645659748146712},"per_request_limits":null,"top_provider":{"context_length":6144,"max_completion_tokens":4096,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"undi95/remm-slerp-l2-13b","alias_ids":null,"forwarded_model_id":null},{"id":"mythomax-l2-13b","name":"MythoMax 13B","created":1688256000,"description":"One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge","context_length":4096,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Llama2","instruct_type":"alpaca"},"pricing":{"prompt":2.5137e-08,"completion":2.5137e-08,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.000102961152,"max_completion_cost":0.000102961152,"max_cost":0.000102961152},"sats_pricing":{"prompt":3.121483216765632e-05,"completion":3.121483216765632e-05,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.1278559525587203,"max_completion_cost":0.1278559525587203,"max_cost":0.1278559525587203},"per_request_limits":null,"top_provider":{"context_length":4096,"max_completion_tokens":4096,"is_moderated":false},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"gryphe/mythomax-l2-13b","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-4","name":"OpenAI: GPT-4","created":1685232000,"description":"OpenAI's flagship model, GPT-4 is a large-scale multimodal language model capable of solving difficult problems with greater accuracy than previous models due to its broader general knowledge and advanced reasoning...","context_length":8191,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":1.25685e-05,"completion":2.5137e-05,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.1029485835,"max_completion_cost":0.102961152,"max_cost":0.1544291595},"sats_pricing":{"prompt":0.015607416083828162,"completion":0.031214832167656324,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":127.84034514263647,"max_completion_cost":127.8559525587203,"max_cost":191.7683214219966},"per_request_limits":null,"top_provider":{"context_length":8191,"max_completion_tokens":4096,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-4","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-3.5-turbo","name":"OpenAI: GPT-3.5 Turbo","created":1685232000,"description":"GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.\n\nTraining data up to Sep 2021.","context_length":16385,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":2.0947500000000004e-07,"completion":6.28425e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.0034322478750000007,"max_completion_cost":0.0025740288,"max_cost":0.005148267075000001},"sats_pricing":{"prompt":0.0002601236013971361,"completion":0.0007803708041914081,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":4.2621252088920745,"max_completion_cost":3.1963988139680075,"max_cost":6.393057751537413},"per_request_limits":null,"top_provider":{"context_length":16385,"max_completion_tokens":4096,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-3.5-turbo","alias_ids":null,"forwarded_model_id":null},{"id":"gpt-4-0314","name":"OpenAI: GPT-4 (older v0314)","created":1685232000,"description":"GPT-4-0314 is the first version of GPT-4 released, with a context length of 8,192 tokens, and was supported until June 14. Training data: up to Sep 2021.","context_length":8191,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":1.25685e-05,"completion":2.5137e-05,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.1029485835,"max_completion_cost":0.102961152,"max_cost":0.1544291595},"sats_pricing":{"prompt":0.015607416083828162,"completion":0.031214832167656324,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":127.84034514263647,"max_completion_cost":127.8559525587203,"max_cost":191.7683214219966},"per_request_limits":null,"top_provider":{"context_length":8191,"max_completion_tokens":4096,"is_moderated":true},"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":"openai/gpt-4-0314","alias_ids":null,"forwarded_model_id":null},{"id":"autoclaw","name":"🔥 AutoClaw (for OpenClaw)","created":1777854613,"description":"PPQ.AI model","context_length":200000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Unknown","instruct_type":null},"pricing":{"prompt":-3.99e-07,"completion":-3.99e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":-0.0798,"max_completion_cost":-0.0798,"max_cost":-0.0798},"sats_pricing":{"prompt":-0.0004954735264707354,"completion":-0.0004954735264707354,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":-99.09470529414705,"max_completion_cost":-99.09470529414705,"max_cost":0.001},"per_request_limits":null,"top_provider":null,"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":null,"alias_ids":null,"forwarded_model_id":null},{"id":"kimi-k2-5","name":"Kimi K2.5 (Private via TEE)","created":1777854613,"description":"PPQ.AI model","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Unknown","instruct_type":null},"pricing":{"prompt":6.284250000000001e-07,"completion":2.1994875000000003e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.16473784320000004,"max_completion_cost":0.5765824512000001,"max_cost":0.5765824512000001},"sats_pricing":{"prompt":0.0007803708041914083,"completion":0.0027312978146699285,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":204.56952409395254,"max_completion_cost":715.9933343288337,"max_cost":715.9933343288337},"per_request_limits":null,"top_provider":null,"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":null,"alias_ids":null,"forwarded_model_id":null},{"id":"llama3-3-70b","name":"Llama 3.3 70B (Private via TEE)","created":1777854613,"description":"PPQ.AI model","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Unknown","instruct_type":null},"pricing":{"prompt":7.331625000000002e-07,"completion":1.1521125000000002e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.09609707520000002,"max_completion_cost":0.15100968960000002,"max_cost":0.15100968960000002},"sats_pricing":{"prompt":0.0009104326048899763,"completion":0.0014306798076842483,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":119.33222238813897,"max_completion_cost":187.5220637527898,"max_cost":187.5220637527898},"per_request_limits":null,"top_provider":null,"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":null,"alias_ids":null,"forwarded_model_id":null},{"id":"qwen3-vl-30b","name":"Qwen3-VL 30B (Private via TEE)","created":1777854613,"description":"PPQ.AI model","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Unknown","instruct_type":null},"pricing":{"prompt":5.236875e-07,"completion":1.6758000000000003e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.137281536,"max_completion_cost":0.4393009152000001,"max_cost":0.4393009152000001},"sats_pricing":{"prompt":0.0006503090034928401,"completion":0.002080988811177089,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":170.47460341162707,"max_completion_cost":545.5187309172068,"max_cost":545.5187309172068},"per_request_limits":null,"top_provider":null,"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":null,"alias_ids":null,"forwarded_model_id":null},{"id":"glm-5-1","name":"GLM-5.1 (Private via TEE)","created":1777854613,"description":"PPQ.AI model","context_length":202752,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Unknown","instruct_type":null},"pricing":{"prompt":6.284250000000001e-07,"completion":2.1994875000000003e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.12741442560000002,"max_completion_cost":0.44595048960000006,"max_cost":0.44595048960000006},"sats_pricing":{"prompt":0.0007803708041914083,"completion":0.0027312978146699285,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":158.2217412914164,"max_completion_cost":553.7760945199574,"max_cost":553.7760945199574},"per_request_limits":null,"top_provider":null,"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":null,"alias_ids":null,"forwarded_model_id":null},{"id":"gemma4-31b","name":"Gemma 4 31B (Private via TEE)","created":1777854613,"description":"PPQ.AI model","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Unknown","instruct_type":null},"pricing":{"prompt":1.8852750000000002e-07,"completion":4.189500000000001e-07,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.049421352960000005,"max_completion_cost":0.10982522880000002,"max_cost":0.10982522880000002},"sats_pricing":{"prompt":0.00023411124125742244,"completion":0.0005202472027942722,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":61.37085722818575,"max_completion_cost":136.3796827293017,"max_cost":136.3796827293017},"per_request_limits":null,"top_provider":null,"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":null,"alias_ids":null,"forwarded_model_id":null},{"id":"kimi-k2-6","name":"Kimi K2.6 (Private via TEE)","created":1777854613,"description":"PPQ.AI model","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Unknown","instruct_type":null},"pricing":{"prompt":6.284250000000001e-07,"completion":2.1994875000000003e-06,"request":0.0,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":0.16473784320000004,"max_completion_cost":0.5765824512000001,"max_cost":0.5765824512000001},"sats_pricing":{"prompt":0.0007803708041914083,"completion":0.0027312978146699285,"request":0.001,"image":0.0,"web_search":0.0,"internal_reasoning":0.0,"input_cache_read":0.0,"input_cache_write":0.0,"max_prompt_cost":204.56952409395254,"max_completion_cost":715.9933343288337,"max_cost":715.9933343288337},"per_request_limits":null,"top_provider":null,"enabled":true,"upstream_provider_id":"ppqai","canonical_slug":null,"alias_ids":null,"forwarded_model_id":null}]}