Chat (Fireworks AI - mixtral-8x7b-instruct)

POST https://api.fireworks.ai/inference/v1/chat/completions

Request Body

{"messages"=>[{"role"=>"user", "content"=>"Explain the importance of low latency LLMs"}], "model"=>"accounts/fireworks/models/mixtral-8x7b-instruct", "temperature"=>0.5, "max_tokens"=>1024, "top_p"=>1, "stream"=>true, "stop"=>nil}

RESPONSES

status: OK

"data: {\"id\":\"b2688e4a-234d-41c2-b861-07319a0b93c1\",\"object\":\"chat.completion.chunk\",\"created\":1711496973,\"model\":\"accounts/fireworks/models/mixtral-8x7b-instruct\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"b2688e4a-234d-41c2-b861-07319a0b93c1\",\"object\":\"chat.completion.chunk\",\"created\":1711496973,\"model\":\"accounts/fireworks/models/mixtral-8x7b-instruct\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"Low lat\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"b2688e4a-234d-41c2-b861-07319a0b93c1\",\"object\":\"chat.completion.chunk\",\"created\":1711496973,\"model\":\"accounts/fireworks/models/mixtral-8x7b-instruct\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"ency Large Language Models (LLMs) are important for a variety of applications due to their ability\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"b2688e4a-234d-41c2-b861-07319a0b93c1\",\"object\":\"chat.completion.chunk\",\"created\":1711496973,\"model\":\"accounts/fireworks/models/mixtral-8x7b-instruct\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" to quickly process and respond to user inputs. Some of the key benefits of low latency LLMs\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"b2688e4a-234d-41c2-b861-07319a0b93c1\",\"object\":\"chat.completion.chunk\",\"created\":1711496973,\"model\":\"accounts/fireworks/models/mixtral-8x7b-instruct\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" include:\\n\\n1. Improved user experience: Low latency LLMs can provide a\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"b2688e4a-234d-41c2-b861-07319a0b93c1\",\"object\":\"chat.completion.chunk\",\"created\":1711496973,\"model\":\"accounts/fireworks/models/mixtral-8x7b-instruct\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" more responsive and fluid user experience, as\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"b2688e4a-234d-41c2-b861-07319a0b93c1\",\"object\":\"chat.completion.chunk\",\"created\":1711496973,\"model\":\"accounts/fireworks/models/mixtral-8x7b-instruct\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" they are able to quickly process and respond to user inputs. This is particularly important for real-time applications such\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"b2688e4a-234d-41c2-b861-07319a0b93c1\",\"object\":\"chat.completion.chunk\",\"created\":1711496973,\"model\":\"accounts/fireworks/models/mixtral-8x7b-instruct\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" as chatbots, virtual assistants, and other interactive systems.\\n\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"b2688e4a-234d-41c2-b861-07319a0b93c1\",\"object\":\"chat.completion.chunk\",\"created\":1711496973,\"model\":\"accounts/fireworks/models/mixtral-8x7b-instruct\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"2. Increased efficiency: Low latency LLMs can process large amounts of data more quickly\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"b2688e4a-234d-41c2-b861-07319a0b93c1\",\"object\":\"chat.completion.chunk\",\"created\":1711496973,\"model\":\"accounts/fireworks/models/mixtral-8x7b-instruct\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\", leading to increased efficiency and throughput. This is particularly important for applications that require\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"b2688e4a-234d-41c2-b861-07319a0b93c1\",\"object\":\"chat.completion.chunk\",\"created\":1711496973,\"model\":\"accounts/fireworks/models/mixtral-8x7b-instruct\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" real-time processing of large amounts of data, such as natural language processing, machine translation,\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"b2688e4a-234d-41c2-b861-07319a0b93c1\",\"object\":\"chat.completion.chunk\",\"created\":1711496973,\"model\":\"accounts/fireworks/models/mixtral-8x7b-instruct\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" and speech recognition.\\n3. Enhanced accuracy: Low latency LLMs can also improve the\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"b2688e4a-234d-41c2-b861-07319a0b93c1\",\"object\":\"chat.completion.chunk\",\"created\":1711496973,\"model\":\"accounts/fireworks/models/mixtral-8x7b-instruct\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" accuracy of their predictions and responses by taking into account the most recent user inputs\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"b2688e4a-234d-41c2-b861-07319a0b93c1\",\"object\":\"chat.completion.chunk\",\"created\":1711496973,\"model\":\"accounts/fireworks/models/mixtral-8x7b-instruct\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\". This is because they are able to process and respond to user inputs more quickly, reducing the\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"b2688e4a-234d-41c2-b861-07319a0b93c1\",\"object\":\"chat.completion.chunk\",\"created\":1711496973,\"model\":\"accounts/fireworks/models/mixtral-8x7b-instruct\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" likelihood of errors or\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"b2688e4a-234d-41c2-b861-07319a0b93c1\",\"object\":\"chat.completion.chunk\",\"created\":1711496973,\"model\":\"accounts/fireworks/models/mixtral-8x7b-instruct\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" outdated information.\\n4. Better integration with other systems: Low latency LLMs can more\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"b2688e4a-234d-41c2-b861-07319a0b93c1\",\"object\":\"chat.completion.chunk\",\"created\":1711496973,\"model\":\"accounts/fireworks/models/mixtral-8x7b-instruct\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" easily integrate with other systems and services, as they are able to quickly process and respond to requests.\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"b2688e4a-234d-41c2-b861-07319a0b93c1\",\"object\":\"chat.completion.chunk\",\"created\":1711496973,\"model\":\"accounts/fireworks/models/mixtral-8x7b-instruct\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" This is particularly important for applications that require real-time communication with other systems, such as databases\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"b2688e4a-234d-41c2-b861-07319a0b93c1\",\"object\":\"chat.completion.chunk\",\"created\":1711496973,\"model\":\"accounts/fireworks/models/mixtral-8x7b-instruct\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\", APIs, and other microservices.\\n5. Improved scalability: Low latency\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"b2688e4a-234d-41c2-b861-07319a0b93c1\",\"object\":\"chat.completion.chunk\",\"created\":1711496973,\"model\":\"accounts/fireworks/models/mixtral-8x7b-instruct\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" LLMs can also be more easily scaled to handle large numbers of concurrent\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"b2688e4a-234d-41c2-b861-07319a0b93c1\",\"object\":\"chat.completion.chunk\",\"created\":1711496973,\"model\":\"accounts/fireworks/models/mixtral-8x7b-instruct\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" users or requests. This is because they are able to quickly process and respond to requests, reducing the\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"b2688e4a-234d-41c2-b861-07319a0b93c1\",\"object\":\"chat.completion.chunk\",\"created\":1711496973,\"model\":\"accounts/fireworks/models/mixtral-8x7b-instruct\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" amount of time and resources required to handle each request.\\n\\nOverall, low\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"b2688e4a-234d-41c2-b861-07319a0b93c1\",\"object\":\"chat.completion.chunk\",\"created\":1711496973,\"model\":\"accounts/fireworks/models/mixtral-8x7b-instruct\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" latency LLMs are important for a variety of applications due to their ability to quickly process and respond to user\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"b2688e4a-234d-41c2-b861-07319a0b93c1\",\"object\":\"chat.completion.chunk\",\"created\":1711496973,\"model\":\"accounts/fireworks/models/mixtral-8x7b-instruct\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" inputs, leading to improved user experience, increased efficiency, enhanced accuracy, better integration with other systems, and improved\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"b2688e4a-234d-41c2-b861-07319a0b93c1\",\"object\":\"chat.completion.chunk\",\"created\":1711496973,\"model\":\"accounts/fireworks/models/mixtral-8x7b-instruct\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" scalability.\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"b2688e4a-234d-41c2-b861-07319a0b93c1\",\"object\":\"chat.completion.chunk\",\"created\":1711496973,\"model\":\"accounts/fireworks/models/mixtral-8x7b-instruct\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":18,\"total_tokens\":420,\"completion_tokens\":402}}\n\ndata: [DONE]\n\n"