Chat (Mistral AI - open-mixtral-8x7b)

POST https://api.mistral.ai/v1/chat/completions

Request Body

{"model"=>"open-mixtral-8x7b", "messages"=>[{"role"=>"user", "content"=>"Explain the importance of low latency LLMs"}], "temperature"=>0.5, "max_tokens"=>1024, "top_p"=>1, "stream"=>true}

RESPONSES

status: OK

"data: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\",\"content\":\"\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\"Low latency large language\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\" models (LLMs) are important in the field of\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\" artificial intelligence and natural language processing (NLP)\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\" due to their ability to quickly process and generate human-\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\"like text in real-time or near real-time\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\" conversations.\\n\\nHere are some of the reasons\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\" why low latency is important for LLMs:\\n\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\"\\n1. Improved user experience: Low latency\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\" LLMs can provide a more seamless and respons\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\"ive\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\" user experience in real-time conversations, such\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\" as\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\" chatbots,\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\" virtual assistants, and other NLP-based\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\" applications\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\". Users expect quick and accurate responses, and low\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\" latency LLMs can help meet these expectations.\\n\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\"2. Better performance in time-sensitive applications\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\": Low latency LLMs are essential in time-\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\"sensitive applications, such as real-time translation\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\", live captioning, and speech-to-\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\"text conversion. In these applications, delays in processing\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\" and generating text can result in significant consequences, such\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\" as lost revenue or poor user experience.\\n3.\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\" Enhanced efficiency: Low latency LLMs\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\" can process and generate text more efficiently, reducing the amount\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\" of computational resources required to perform NLP tasks\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\".\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\" This can lead to cost savings and improved scalability\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\" in large-scale NLP applications.\\n4.\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\" Competitive advantage: Low latency LLMs can\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\" provide a competitive advantage in industries where real-time N\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\"LP applications are critical, such as finance, healthcare\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\", and customer service. Faster and more accurate N\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\"LP processing can lead to better decision-making,\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\" improved patient outcomes, and higher customer satisfaction.\\n\\n\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\"In summary, low latency LLMs are important\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\" for improving user experience, performance in time-sensitive\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\" applications, efficiency, and providing a competitive advantage in\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\" various industries. As NLP applications continue to grow in\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\" popularity and complexity, low latency LLMs will\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\" become increasingly important for meeting the demands of real-time\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\" NLP processing.\"},\"finish_reason\":null}],\"usage\":null}\n\ndata: {\"id\":\"25430aa7c3b64746baf46c2ed8c8e6b7\",\"object\":\"chat.completion.chunk\",\"created\":1711929876,\"model\":\"open-mixtral-8x7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":null,\"content\":\"\"},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":18,\"total_tokens\":412,\"completion_tokens\":394}}\n\ndata: [DONE]\n\n"