Chat (Fireworks AI - mixtral-8x7b-instruct)

POST https://api.fireworks.ai/inference/v1/chat/completions

Request Body

{"messages"=>[{"role"=>"user", "content"=>"Explain the importance of low latency LLMs"}], "model"=>"accounts/fireworks/models/mixtral-8x7b-instruct", "temperature"=>0.5, "max_tokens"=>1024, "top_p"=>1, "stream"=>true, "stop"=>nil}

RESPONSES

status: OK

&quot;data: {\&quot;id\&quot;:\&quot;b2688e4a-234d-41c2-b861-07319a0b93c1\&quot;,\&quot;object\&quot;:\&quot;chat.completion.chunk\&quot;,\&quot;created\&quot;:1711496973,\&quot;model\&quot;:\&quot;accounts/fireworks/models/mixtral-8x7b-instruct\&quot;,\&quot;choices\&quot;:[{\&quot;index\&quot;:0,\&quot;delta\&quot;:{\&quot;role\&quot;:\&quot;assistant\&quot;},\&quot;finish_reason\&quot;:null}],\&quot;usage\&quot;:null}\n\ndata: {\&quot;id\&quot;:\&quot;b2688e4a-234d-41c2-b861-07319a0b93c1\&quot;,\&quot;object\&quot;:\&quot;chat.completion.chunk\&quot;,\&quot;created\&quot;:1711496973,\&quot;model\&quot;:\&quot;accounts/fireworks/models/mixtral-8x7b-instruct\&quot;,\&quot;choices\&quot;:[{\&quot;index\&quot;:0,\&quot;delta\&quot;:{\&quot;content\&quot;:\&quot;Low lat\&quot;},\&quot;finish_reason\&quot;:null}],\&quot;usage\&quot;:null}\n\ndata: {\&quot;id\&quot;:\&quot;b2688e4a-234d-41c2-b861-07319a0b93c1\&quot;,\&quot;object\&quot;:\&quot;chat.completion.chunk\&quot;,\&quot;created\&quot;:1711496973,\&quot;model\&quot;:\&quot;accounts/fireworks/models/mixtral-8x7b-instruct\&quot;,\&quot;choices\&quot;:[{\&quot;index\&quot;:0,\&quot;delta\&quot;:{\&quot;content\&quot;:\&quot;ency Large Language Models (LLMs) are important for a variety of applications due to their ability\&quot;},\&quot;finish_reason\&quot;:null}],\&quot;usage\&quot;:null}\n\ndata: {\&quot;id\&quot;:\&quot;b2688e4a-234d-41c2-b861-07319a0b93c1\&quot;,\&quot;object\&quot;:\&quot;chat.completion.chunk\&quot;,\&quot;created\&quot;:1711496973,\&quot;model\&quot;:\&quot;accounts/fireworks/models/mixtral-8x7b-instruct\&quot;,\&quot;choices\&quot;:[{\&quot;index\&quot;:0,\&quot;delta\&quot;:{\&quot;content\&quot;:\&quot; to quickly process and respond to user inputs. Some of the key benefits of low latency LLMs\&quot;},\&quot;finish_reason\&quot;:null}],\&quot;usage\&quot;:null}\n\ndata: {\&quot;id\&quot;:\&quot;b2688e4a-234d-41c2-b861-07319a0b93c1\&quot;,\&quot;object\&quot;:\&quot;chat.completion.chunk\&quot;,\&quot;created\&quot;:1711496973,\&quot;model\&quot;:\&quot;accounts/fireworks/models/mixtral-8x7b-instruct\&quot;,\&quot;choices\&quot;:[{\&quot;index\&quot;:0,\&quot;delta\&quot;:{\&quot;content\&quot;:\&quot; include:\\n\\n1. Improved user experience: Low latency LLMs can provide a\&quot;},\&quot;finish_reason\&quot;:null}],\&quot;usage\&quot;:null}\n\ndata: {\&quot;id\&quot;:\&quot;b2688e4a-234d-41c2-b861-07319a0b93c1\&quot;,\&quot;object\&quot;:\&quot;chat.completion.chunk\&quot;,\&quot;created\&quot;:1711496973,\&quot;model\&quot;:\&quot;accounts/fireworks/models/mixtral-8x7b-instruct\&quot;,\&quot;choices\&quot;:[{\&quot;index\&quot;:0,\&quot;delta\&quot;:{\&quot;content\&quot;:\&quot; more responsive and fluid user experience, as\&quot;},\&quot;finish_reason\&quot;:null}],\&quot;usage\&quot;:null}\n\ndata: {\&quot;id\&quot;:\&quot;b2688e4a-234d-41c2-b861-07319a0b93c1\&quot;,\&quot;object\&quot;:\&quot;chat.completion.chunk\&quot;,\&quot;created\&quot;:1711496973,\&quot;model\&quot;:\&quot;accounts/fireworks/models/mixtral-8x7b-instruct\&quot;,\&quot;choices\&quot;:[{\&quot;index\&quot;:0,\&quot;delta\&quot;:{\&quot;content\&quot;:\&quot; they are able to quickly process and respond to user inputs. This is particularly important for real-time applications such\&quot;},\&quot;finish_reason\&quot;:null}],\&quot;usage\&quot;:null}\n\ndata: {\&quot;id\&quot;:\&quot;b2688e4a-234d-41c2-b861-07319a0b93c1\&quot;,\&quot;object\&quot;:\&quot;chat.completion.chunk\&quot;,\&quot;created\&quot;:1711496973,\&quot;model\&quot;:\&quot;accounts/fireworks/models/mixtral-8x7b-instruct\&quot;,\&quot;choices\&quot;:[{\&quot;index\&quot;:0,\&quot;delta\&quot;:{\&quot;content\&quot;:\&quot; as chatbots, virtual assistants, and other interactive systems.\\n\&quot;},\&quot;finish_reason\&quot;:null}],\&quot;usage\&quot;:null}\n\ndata: {\&quot;id\&quot;:\&quot;b2688e4a-234d-41c2-b861-07319a0b93c1\&quot;,\&quot;object\&quot;:\&quot;chat.completion.chunk\&quot;,\&quot;created\&quot;:1711496973,\&quot;model\&quot;:\&quot;accounts/fireworks/models/mixtral-8x7b-instruct\&quot;,\&quot;choices\&quot;:[{\&quot;index\&quot;:0,\&quot;delta\&quot;:{\&quot;content\&quot;:\&quot;2. Increased efficiency: Low latency LLMs can process large amounts of data more quickly\&quot;},\&quot;finish_reason\&quot;:null}],\&quot;usage\&quot;:null}\n\ndata: {\&quot;id\&quot;:\&quot;b2688e4a-234d-41c2-b861-07319a0b93c1\&quot;,\&quot;object\&quot;:\&quot;chat.completion.chunk\&quot;,\&quot;created\&quot;:1711496973,\&quot;model\&quot;:\&quot;accounts/fireworks/models/mixtral-8x7b-instruct\&quot;,\&quot;choices\&quot;:[{\&quot;index\&quot;:0,\&quot;delta\&quot;:{\&quot;content\&quot;:\&quot;, leading to increased efficiency and throughput. This is particularly important for applications that require\&quot;},\&quot;finish_reason\&quot;:null}],\&quot;usage\&quot;:null}\n\ndata: {\&quot;id\&quot;:\&quot;b2688e4a-234d-41c2-b861-07319a0b93c1\&quot;,\&quot;object\&quot;:\&quot;chat.completion.chunk\&quot;,\&quot;created\&quot;:1711496973,\&quot;model\&quot;:\&quot;accounts/fireworks/models/mixtral-8x7b-instruct\&quot;,\&quot;choices\&quot;:[{\&quot;index\&quot;:0,\&quot;delta\&quot;:{\&quot;content\&quot;:\&quot; real-time processing of large amounts of data, such as natural language processing, machine translation,\&quot;},\&quot;finish_reason\&quot;:null}],\&quot;usage\&quot;:null}\n\ndata: {\&quot;id\&quot;:\&quot;b2688e4a-234d-41c2-b861-07319a0b93c1\&quot;,\&quot;object\&quot;:\&quot;chat.completion.chunk\&quot;,\&quot;created\&quot;:1711496973,\&quot;model\&quot;:\&quot;accounts/fireworks/models/mixtral-8x7b-instruct\&quot;,\&quot;choices\&quot;:[{\&quot;index\&quot;:0,\&quot;delta\&quot;:{\&quot;content\&quot;:\&quot; and speech recognition.\\n3. Enhanced accuracy: Low latency LLMs can also improve the\&quot;},\&quot;finish_reason\&quot;:null}],\&quot;usage\&quot;:null}\n\ndata: {\&quot;id\&quot;:\&quot;b2688e4a-234d-41c2-b861-07319a0b93c1\&quot;,\&quot;object\&quot;:\&quot;chat.completion.chunk\&quot;,\&quot;created\&quot;:1711496973,\&quot;model\&quot;:\&quot;accounts/fireworks/models/mixtral-8x7b-instruct\&quot;,\&quot;choices\&quot;:[{\&quot;index\&quot;:0,\&quot;delta\&quot;:{\&quot;content\&quot;:\&quot; accuracy of their predictions and responses by taking into account the most recent user inputs\&quot;},\&quot;finish_reason\&quot;:null}],\&quot;usage\&quot;:null}\n\ndata: {\&quot;id\&quot;:\&quot;b2688e4a-234d-41c2-b861-07319a0b93c1\&quot;,\&quot;object\&quot;:\&quot;chat.completion.chunk\&quot;,\&quot;created\&quot;:1711496973,\&quot;model\&quot;:\&quot;accounts/fireworks/models/mixtral-8x7b-instruct\&quot;,\&quot;choices\&quot;:[{\&quot;index\&quot;:0,\&quot;delta\&quot;:{\&quot;content\&quot;:\&quot;. This is because they are able to process and respond to user inputs more quickly, reducing the\&quot;},\&quot;finish_reason\&quot;:null}],\&quot;usage\&quot;:null}\n\ndata: {\&quot;id\&quot;:\&quot;b2688e4a-234d-41c2-b861-07319a0b93c1\&quot;,\&quot;object\&quot;:\&quot;chat.completion.chunk\&quot;,\&quot;created\&quot;:1711496973,\&quot;model\&quot;:\&quot;accounts/fireworks/models/mixtral-8x7b-instruct\&quot;,\&quot;choices\&quot;:[{\&quot;index\&quot;:0,\&quot;delta\&quot;:{\&quot;content\&quot;:\&quot; likelihood of errors or\&quot;},\&quot;finish_reason\&quot;:null}],\&quot;usage\&quot;:null}\n\ndata: {\&quot;id\&quot;:\&quot;b2688e4a-234d-41c2-b861-07319a0b93c1\&quot;,\&quot;object\&quot;:\&quot;chat.completion.chunk\&quot;,\&quot;created\&quot;:1711496973,\&quot;model\&quot;:\&quot;accounts/fireworks/models/mixtral-8x7b-instruct\&quot;,\&quot;choices\&quot;:[{\&quot;index\&quot;:0,\&quot;delta\&quot;:{\&quot;content\&quot;:\&quot; outdated information.\\n4. Better integration with other systems: Low latency LLMs can more\&quot;},\&quot;finish_reason\&quot;:null}],\&quot;usage\&quot;:null}\n\ndata: {\&quot;id\&quot;:\&quot;b2688e4a-234d-41c2-b861-07319a0b93c1\&quot;,\&quot;object\&quot;:\&quot;chat.completion.chunk\&quot;,\&quot;created\&quot;:1711496973,\&quot;model\&quot;:\&quot;accounts/fireworks/models/mixtral-8x7b-instruct\&quot;,\&quot;choices\&quot;:[{\&quot;index\&quot;:0,\&quot;delta\&quot;:{\&quot;content\&quot;:\&quot; easily integrate with other systems and services, as they are able to quickly process and respond to requests.\&quot;},\&quot;finish_reason\&quot;:null}],\&quot;usage\&quot;:null}\n\ndata: {\&quot;id\&quot;:\&quot;b2688e4a-234d-41c2-b861-07319a0b93c1\&quot;,\&quot;object\&quot;:\&quot;chat.completion.chunk\&quot;,\&quot;created\&quot;:1711496973,\&quot;model\&quot;:\&quot;accounts/fireworks/models/mixtral-8x7b-instruct\&quot;,\&quot;choices\&quot;:[{\&quot;index\&quot;:0,\&quot;delta\&quot;:{\&quot;content\&quot;:\&quot; This is particularly important for applications that require real-time communication with other systems, such as databases\&quot;},\&quot;finish_reason\&quot;:null}],\&quot;usage\&quot;:null}\n\ndata: {\&quot;id\&quot;:\&quot;b2688e4a-234d-41c2-b861-07319a0b93c1\&quot;,\&quot;object\&quot;:\&quot;chat.completion.chunk\&quot;,\&quot;created\&quot;:1711496973,\&quot;model\&quot;:\&quot;accounts/fireworks/models/mixtral-8x7b-instruct\&quot;,\&quot;choices\&quot;:[{\&quot;index\&quot;:0,\&quot;delta\&quot;:{\&quot;content\&quot;:\&quot;, APIs, and other microservices.\\n5. Improved scalability: Low latency\&quot;},\&quot;finish_reason\&quot;:null}],\&quot;usage\&quot;:null}\n\ndata: {\&quot;id\&quot;:\&quot;b2688e4a-234d-41c2-b861-07319a0b93c1\&quot;,\&quot;object\&quot;:\&quot;chat.completion.chunk\&quot;,\&quot;created\&quot;:1711496973,\&quot;model\&quot;:\&quot;accounts/fireworks/models/mixtral-8x7b-instruct\&quot;,\&quot;choices\&quot;:[{\&quot;index\&quot;:0,\&quot;delta\&quot;:{\&quot;content\&quot;:\&quot; LLMs can also be more easily scaled to handle large numbers of concurrent\&quot;},\&quot;finish_reason\&quot;:null}],\&quot;usage\&quot;:null}\n\ndata: {\&quot;id\&quot;:\&quot;b2688e4a-234d-41c2-b861-07319a0b93c1\&quot;,\&quot;object\&quot;:\&quot;chat.completion.chunk\&quot;,\&quot;created\&quot;:1711496973,\&quot;model\&quot;:\&quot;accounts/fireworks/models/mixtral-8x7b-instruct\&quot;,\&quot;choices\&quot;:[{\&quot;index\&quot;:0,\&quot;delta\&quot;:{\&quot;content\&quot;:\&quot; users or requests. This is because they are able to quickly process and respond to requests, reducing the\&quot;},\&quot;finish_reason\&quot;:null}],\&quot;usage\&quot;:null}\n\ndata: {\&quot;id\&quot;:\&quot;b2688e4a-234d-41c2-b861-07319a0b93c1\&quot;,\&quot;object\&quot;:\&quot;chat.completion.chunk\&quot;,\&quot;created\&quot;:1711496973,\&quot;model\&quot;:\&quot;accounts/fireworks/models/mixtral-8x7b-instruct\&quot;,\&quot;choices\&quot;:[{\&quot;index\&quot;:0,\&quot;delta\&quot;:{\&quot;content\&quot;:\&quot; amount of time and resources required to handle each request.\\n\\nOverall, low\&quot;},\&quot;finish_reason\&quot;:null}],\&quot;usage\&quot;:null}\n\ndata: {\&quot;id\&quot;:\&quot;b2688e4a-234d-41c2-b861-07319a0b93c1\&quot;,\&quot;object\&quot;:\&quot;chat.completion.chunk\&quot;,\&quot;created\&quot;:1711496973,\&quot;model\&quot;:\&quot;accounts/fireworks/models/mixtral-8x7b-instruct\&quot;,\&quot;choices\&quot;:[{\&quot;index\&quot;:0,\&quot;delta\&quot;:{\&quot;content\&quot;:\&quot; latency LLMs are important for a variety of applications due to their ability to quickly process and respond to user\&quot;},\&quot;finish_reason\&quot;:null}],\&quot;usage\&quot;:null}\n\ndata: {\&quot;id\&quot;:\&quot;b2688e4a-234d-41c2-b861-07319a0b93c1\&quot;,\&quot;object\&quot;:\&quot;chat.completion.chunk\&quot;,\&quot;created\&quot;:1711496973,\&quot;model\&quot;:\&quot;accounts/fireworks/models/mixtral-8x7b-instruct\&quot;,\&quot;choices\&quot;:[{\&quot;index\&quot;:0,\&quot;delta\&quot;:{\&quot;content\&quot;:\&quot; inputs, leading to improved user experience, increased efficiency, enhanced accuracy, better integration with other systems, and improved\&quot;},\&quot;finish_reason\&quot;:null}],\&quot;usage\&quot;:null}\n\ndata: {\&quot;id\&quot;:\&quot;b2688e4a-234d-41c2-b861-07319a0b93c1\&quot;,\&quot;object\&quot;:\&quot;chat.completion.chunk\&quot;,\&quot;created\&quot;:1711496973,\&quot;model\&quot;:\&quot;accounts/fireworks/models/mixtral-8x7b-instruct\&quot;,\&quot;choices\&quot;:[{\&quot;index\&quot;:0,\&quot;delta\&quot;:{\&quot;content\&quot;:\&quot; scalability.\&quot;},\&quot;finish_reason\&quot;:null}],\&quot;usage\&quot;:null}\n\ndata: {\&quot;id\&quot;:\&quot;b2688e4a-234d-41c2-b861-07319a0b93c1\&quot;,\&quot;object\&quot;:\&quot;chat.completion.chunk\&quot;,\&quot;created\&quot;:1711496973,\&quot;model\&quot;:\&quot;accounts/fireworks/models/mixtral-8x7b-instruct\&quot;,\&quot;choices\&quot;:[{\&quot;index\&quot;:0,\&quot;delta\&quot;:{},\&quot;finish_reason\&quot;:\&quot;stop\&quot;}],\&quot;usage\&quot;:{\&quot;prompt_tokens\&quot;:18,\&quot;total_tokens\&quot;:420,\&quot;completion_tokens\&quot;:402}}\n\ndata: [DONE]\n\n&quot;