{"id":18929,"date":"2023-11-06T09:42:35","date_gmt":"2023-11-06T09:42:35","guid":{"rendered":"https:\/\/techpearl.com\/?p=18929"},"modified":"2024-06-26T09:16:11","modified_gmt":"2024-06-26T09:16:11","slug":"developing-conversational-ai-applications-by-harnessing-the-power-of-llms","status":"publish","type":"post","link":"https:\/\/techpearl.com\/1719395789229\/developing-conversational-ai-applications-by-harnessing-the-power-of-llms\/","title":{"rendered":"Developing Conversational AI applications by harnessing the power of LLMs"},"content":{"rendered":"\t\t<div data-elementor-type=\"wp-post\" data-elementor-id=\"18929\" class=\"elementor elementor-18929\" data-elementor-settings=\"[]\">\n\t\t\t\t\t\t\t<div class=\"elementor-section-wrap\">\n\t\t\t\t\t\t\t<section class=\"has_ma_el_bg_slider elementor-section elementor-top-section elementor-element elementor-element-5cdabc4 elementor-section-boxed elementor-section-height-default elementor-section-height-default jltma-glass-effect-no\" data-id=\"5cdabc4\" data-element_type=\"section\" data-settings=\"{&quot;_ha_eqh_enable&quot;:false}\">\n\t\t\t\t\t\t<div class=\"elementor-container elementor-column-gap-default\">\n\t\t\t\t\t<div class=\"has_ma_el_bg_slider elementor-column elementor-col-100 elementor-top-column elementor-element elementor-element-1f796f2f jltma-glass-effect-no\" data-id=\"1f796f2f\" data-element_type=\"column\">\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t\t\t<div class=\"elementor-element elementor-element-3ba6d100 jltma-glass-effect-no elementor-widget elementor-widget-theme-post-title elementor-page-title elementor-widget-heading\" data-id=\"3ba6d100\" data-element_type=\"widget\" data-widget_type=\"theme-post-title.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t<h1 class=\"elementor-heading-title elementor-size-default\">Developing Conversational AI applications by harnessing the power of LLMs<\/h1>\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-3915d592 elementor-author-box--align-left elementor-author-box--image-valign-top elementor-author-box--name-yes elementor-author-box--biography-yes elementor-author-box--link-no jltma-glass-effect-no elementor-widget elementor-widget-author-box\" data-id=\"3915d592\" data-element_type=\"widget\" data-widget_type=\"author-box.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t<div class=\"elementor-author-box\">\n\t\t\t\n\t\t\t<div class=\"elementor-author-box__text\">\n\t\t\t\t\t\t\t\t\t<div >\n\t\t\t\t\t\t<h4 class=\"elementor-author-box__name\">Sreekanth Reddy<\/h4>\t\t\t\t\t<\/div>\n\t\t\t\t\n\t\t\t\t\t\t\t\t\t<div class=\"elementor-author-box__bio\">\n\t\t\t\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t\n\t\t\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-360fa5fe bdt-ss-btns-view-icon bdt-ss-btns-shape-rounded bdt-ss-btns-align-left bdt-ep-grid-0 bdt-ss-btns-style-flat bdt-ss-btns-color-original jltma-glass-effect-no elementor-widget elementor-widget-bdt-social-share\" data-id=\"360fa5fe\" data-element_type=\"widget\" data-widget_type=\"bdt-social-share.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t<div class=\"bdt-social-share bdt-ep-grid\">\n\t\t\t\t\t\t\t<div class=\"bdt-social-share-item bdt-ep-grid-item\">\n\t\t\t\t\t<div class=\"bdt-ss-btn bdt-ss-linkedin\" data-social=\"linkedin\">\n\t\t\t\t\t\t\t\t\t\t\t\t\t<span class=\"bdt-ss-icon\">\n\t\t\t\t\t\t\t\t<i class=\"ep-linkedin\"><\/i>\n\t\t\t\t\t\t\t<\/span>\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t\t\t\t<div class=\"bdt-social-share-item bdt-ep-grid-item\">\n\t\t\t\t\t<div class=\"bdt-ss-btn bdt-ss-twitter\" data-social=\"twitter\">\n\t\t\t\t\t\t\t\t\t\t\t\t\t<span class=\"bdt-ss-icon\">\n\t\t\t\t\t\t\t\t<i class=\"ep-twitter\"><\/i>\n\t\t\t\t\t\t\t<\/span>\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t\t\t\t<div class=\"bdt-social-share-item bdt-ep-grid-item\">\n\t\t\t\t\t<div class=\"bdt-ss-btn bdt-ss-facebook\" data-social=\"facebook\">\n\t\t\t\t\t\t\t\t\t\t\t\t\t<span class=\"bdt-ss-icon\">\n\t\t\t\t\t\t\t\t<i class=\"ep-facebook\"><\/i>\n\t\t\t\t\t\t\t<\/span>\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t\t<\/div>\n\n\t\t\n\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t\t\t<\/div>\n\t\t<\/section>\n\t\t\t\t<section class=\"has_ma_el_bg_slider elementor-section elementor-top-section elementor-element elementor-element-13fb7c95 elementor-section-boxed elementor-section-height-default elementor-section-height-default jltma-glass-effect-no\" data-id=\"13fb7c95\" data-element_type=\"section\" data-settings=\"{&quot;_ha_eqh_enable&quot;:false}\">\n\t\t\t\t\t\t<div class=\"elementor-container elementor-column-gap-default\">\n\t\t\t\t\t<div class=\"has_ma_el_bg_slider elementor-column elementor-col-100 elementor-top-column elementor-element elementor-element-4a233168 jltma-glass-effect-no\" data-id=\"4a233168\" data-element_type=\"column\">\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t\t\t<div class=\"elementor-element elementor-element-10b2d192 jltma-glass-effect-no elementor-widget elementor-widget-theme-post-featured-image elementor-widget-image\" data-id=\"10b2d192\" data-element_type=\"widget\" data-widget_type=\"theme-post-featured-image.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<img loading=\"lazy\" decoding=\"async\" width=\"550\" height=\"407\" src=\"https:\/\/techpearl.com\/1719395789229\/wp-content\/uploads\/2023\/11\/Developing-Conversational-AI-applications-by-harnessing-the-power-of-LLMs-.jpg\" class=\"attachment-full size-full\" alt=\"Developing Conversational AI applications by harnessing the power of LLMs\" srcset=\"https:\/\/techpearl.com\/1719395789229\/wp-content\/uploads\/2023\/11\/Developing-Conversational-AI-applications-by-harnessing-the-power-of-LLMs-.jpg 550w, https:\/\/techpearl.com\/1719395789229\/wp-content\/uploads\/2023\/11\/Developing-Conversational-AI-applications-by-harnessing-the-power-of-LLMs--300x222.jpg 300w\" sizes=\"auto, (max-width: 550px) 100vw, 550px\" \/>\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t\t\t<\/div>\n\t\t<\/section>\n\t\t\t\t<section class=\"has_ma_el_bg_slider elementor-section elementor-top-section elementor-element elementor-element-394ff0e3 elementor-section-boxed elementor-section-height-default elementor-section-height-default jltma-glass-effect-no\" data-id=\"394ff0e3\" data-element_type=\"section\" data-settings=\"{&quot;_ha_eqh_enable&quot;:false}\">\n\t\t\t\t\t\t<div class=\"elementor-container elementor-column-gap-default\">\n\t\t\t\t\t<div class=\"has_ma_el_bg_slider elementor-column elementor-col-100 elementor-top-column elementor-element elementor-element-417511dc jltma-glass-effect-no\" data-id=\"417511dc\" data-element_type=\"column\">\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t\t\t<div class=\"elementor-element elementor-element-2e42d32c jltma-glass-effect-no elementor-widget elementor-widget-heading\" data-id=\"2e42d32c\" data-element_type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">Introduction<\/h2>\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-6f1713fb jltma-glass-effect-no elementor-widget elementor-widget-text-editor\" data-id=\"6f1713fb\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<p>The power of conversational AI has become a transformational force in a world where technology continues to alter how people live, work, and communicate. Imagine a computer being that can converse with us in a human-like manner, comprehend our wants, and provide insightful answers. In this blog, we embark on a journey into this transformative field by sharing our experience in developing a chatbot that seamlessly combines the power of Langchain with the natural language processing abilities of OpenAI&#8217;s GPT-3.5.<\/p><p>Chatbots are quite useful for addressing client inquiries regarding the company or the website. We can utilize the capabilities of large language models like GPT-3.5 to offer replies based on the data we provide.<\/p>\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-1f744847 jltma-glass-effect-no elementor-widget elementor-widget-heading\" data-id=\"1f744847\" data-element_type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">Large Language Model (LLM)\n<\/h2>\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-396c37c9 jltma-glass-effect-no elementor-widget elementor-widget-text-editor\" data-id=\"396c37c9\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<p>A large language model (LLM) is an advanced AI system that has been trained on large amounts of text data and is capable of understanding and generating human-like text. LLMs are based on the transformer-based architecture, which was first introduced in the paper &#8220;<a href=\"https:\/\/arxiv.org\/pdf\/1706.03762.pdf\" target=\"_blank\" rel=\"noopener\">Attention is all you need<\/a>&#8220;.<\/p><figure id=\"attachment_18964\" aria-describedby=\"caption-attachment-18964\" style=\"width: 429px\" class=\"wp-caption aligncenter\"><img loading=\"lazy\" decoding=\"async\" class=\"wp-image-18964 \" title=\"Attention\" src=\"https:\/\/techpearl.com\/1645704443043\/wp-content\/uploads\/2023\/11\/Attention-695x1024.png\" alt=\"Attention\" width=\"429\" height=\"633\" \/><figcaption id=\"caption-attachment-18964\" class=\"wp-caption-text\">\u00a0<\/figcaption><\/figure><center>The Transformer &#8211; model architecture (from the paper \u201cattention is all you need\u201d).<\/center><p><br \/>LLMs perform so well in responding to user inquiries in a friendly and understandable manner because they are capable of natural language processing and have been well trained on a large corpus of data.<\/p>\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-44b1e270 jltma-glass-effect-no elementor-widget elementor-widget-heading\" data-id=\"44b1e270\" data-element_type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">Exploring popular LLMs<\/h2>\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-6b0ae19f jltma-glass-effect-no elementor-widget elementor-widget-text-editor\" data-id=\"6b0ae19f\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<p>Here is a list of a few popular LLMs.<\/p><ol><li style=\"font-weight: 400;\" aria-level=\"1\"><span style=\"font-weight: 400;\">GPT-4<\/span><\/li><li style=\"font-weight: 400;\" aria-level=\"1\"><span style=\"font-weight: 400;\">GPT-3.5<\/span><\/li><li style=\"font-weight: 400;\" aria-level=\"1\"><span style=\"font-weight: 400;\">BERT<\/span><\/li><li style=\"font-weight: 400;\" aria-level=\"1\"><span style=\"font-weight: 400;\">Cohere<\/span><\/li><li style=\"font-weight: 400;\" aria-level=\"1\"><span style=\"font-weight: 400;\">Anthropic<\/span><\/li><li style=\"font-weight: 400;\" aria-level=\"1\"><span style=\"font-weight: 400;\">LaMDA, etc.<\/span><\/li><\/ol><p><span style=\"font-weight: 400;\">The GPT-4 surpasses all of these models, but it takes a little longer to respond. Instead, we may utilize GPT-3.5, which is the fastest model available from openAI and has been trained using 175 billion parameters. It is also great for engaging in conversation.<\/span><\/p>\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-5cc8813f jltma-glass-effect-no elementor-widget elementor-widget-heading\" data-id=\"5cc8813f\" data-element_type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">Training Approaches<\/h2>\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-f6cc628 jltma-glass-effect-no elementor-widget elementor-widget-text-editor\" data-id=\"f6cc628\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<p><span style=\"font-weight: 400;\">Since the language model we choose is conversant, it cannot respond to queries on our private data and documents. To do this, we must train the model to deliver responses based on the data we provide.\u00a0<\/span><\/p><p><span style=\"font-weight: 400;\">We talk about two approaches we can take to get the LLM to respond to queries on our data.<\/span><\/p><ol><li style=\"font-weight: 400;\" aria-level=\"1\"><span style=\"font-weight: 400;\">Fine tuning<\/span><\/li><li style=\"font-weight: 400;\" aria-level=\"1\"><span style=\"font-weight: 400;\">Retrieval-augmented generation (RAG)<\/span><\/li><\/ol>\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-4939be40 jltma-glass-effect-no elementor-widget elementor-widget-heading\" data-id=\"4939be40\" data-element_type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">Fine Tuning\n<\/h2>\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-398cf181 jltma-glass-effect-no elementor-widget elementor-widget-text-editor\" data-id=\"398cf181\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<p>A model can be &#8220;fine-tuned&#8221; by changing its parameters in order to perform better at a particular job. That is, as we feed the data to the model, it fine-tunes its parameters by adjusting them to generate the response based on the data we feed it.<\/p><p>Training data must be a JSONL document, with each line containing a prompt-completion pair matching a training example. See the sample below for a general idea of what training data should look like.<\/p>\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-5f69e70f jltma-glass-effect-no elementor-widget elementor-widget-text-editor\" data-id=\"5f69e70f\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<p>Template: {&#8220;prompt&#8221;: &#8220;&lt;prompt text or question&gt;&#8221;, &#8220;completion&#8221;: &#8220;&lt;response for prompt&gt;&#8221;}<\/p>\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-577ded0c jltma-glass-effect-no elementor-widget elementor-widget-text-editor\" data-id=\"577ded0c\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<p>Example: {&#8220;prompt&#8221;: &#8220;what is 1+1&#8221;, &#8220;completion&#8221;: &#8220;2&#8221;}.<\/p>\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-4eff146b jltma-glass-effect-no elementor-widget elementor-widget-heading\" data-id=\"4eff146b\" data-element_type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">Retrieval-augmented generation (RAG)<\/h2>\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-1da0aa5d jltma-glass-effect-no elementor-widget elementor-widget-text-editor\" data-id=\"1da0aa5d\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<p>RAG is a machine learning framework that combines retrieval-based and generation-based approaches to improve the performance of language models in generating coherent and contextually relevant text. In the RAG framework, we utilize a retriever to choose relevant documents or passages from a huge corpus of text depending on a specific query or context. And the generator is typically a language model, such as GPT-3.5, that takes the retrieved information and generates human-like text based on that context.<\/p><figure id=\"attachment_18965\" aria-describedby=\"caption-attachment-18965\" style=\"width: 960px\" class=\"wp-caption alignnone\"><img loading=\"lazy\" decoding=\"async\" class=\"size-full wp-image-18965\" title=\"Flow Chart\" src=\"https:\/\/techpearl.com\/1645704443043\/wp-content\/uploads\/2023\/11\/Flow-Chart.png\" alt=\"Flow Chart\" width=\"960\" height=\"540\" \/><figcaption id=\"caption-attachment-18965\" class=\"wp-caption-text\">Flow Chart<\/figcaption><\/figure>\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-25127649 jltma-glass-effect-no elementor-widget elementor-widget-heading\" data-id=\"25127649\" data-element_type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">The power of RAG<\/h2>\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-6f02a970 jltma-glass-effect-no elementor-widget elementor-widget-text-editor\" data-id=\"6f02a970\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<p>Fine-tuning is a time-consuming process that requires a lot of work to process the training data, and even then, it might not work well. It also requires a large amount of training data.<\/p><p>The RAG implementation will always make sure that the model produces a response depending on the current context and the information that has been retrieved. This means that the generated text will be more relevant and accurate, as it takes into account the most up-to-date information available. RAG further gives users more control over the generated text by enabling them to provide certain prompts or restrictions that will direct the model&#8217;s output.<\/p><p>The RAG framework requires a series of processes to be completed, and using Langchain for these activities makes the procedure simpler.<\/p>\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-3596ff70 jltma-glass-effect-no elementor-widget elementor-widget-heading\" data-id=\"3596ff70\" data-element_type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">Streamlining with Langchain<\/h2>\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-4487b614 jltma-glass-effect-no elementor-widget elementor-widget-text-editor\" data-id=\"4487b614\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<p>LangChain is a freely accessible toolkit designed for crafting applications driven by language models, accessible in both Python and JavaScript. What makes LangChain stand out is its innovative use of chains, allowing the amalgamation of multiple steps into a singular function, thereby reducing the coding effort required from developers.<\/p>\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-341683d2 jltma-glass-effect-no elementor-widget elementor-widget-heading\" data-id=\"341683d2\" data-element_type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">Handling Data<\/h2>\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-3eb5f23e jltma-glass-effect-no elementor-widget elementor-widget-text-editor\" data-id=\"3eb5f23e\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<p>Data can be in a variety of forms. As a first step in the RAG framework, we have to retrieve the text depending on a specific query or context. In order to do this, we must extract textual information from different kinds of documents and store it in a way that is easy to retrieve. We first extract the text data from documents using LangChain document loaders and then store it as objects that each include pageContent (the document&#8217;s content) and document metadata. Each object represents a single document.<\/p>\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-2a29fd8a jltma-glass-effect-no elementor-widget elementor-widget-heading\" data-id=\"2a29fd8a\" data-element_type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">Overcoming Token Limitations<\/h2>\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-50417aaa jltma-glass-effect-no elementor-widget elementor-widget-text-editor\" data-id=\"50417aaa\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<p>One of the main limitations of the RAG framework is that LLMs won&#8217;t accept huge amounts of data at once. Let us dive deeper into this topic. GPT-3.5, accepts text data of size up to 4096 tokens. Tokens are the smallest units of text; they can be character, word, or subword units. Look at the below image for a better visual understanding.<\/p><p><img loading=\"lazy\" decoding=\"async\" class=\"alignnone wp-image-18966\" title=\"Tokens example\" src=\"https:\/\/techpearl.com\/1645704443043\/wp-content\/uploads\/2023\/11\/Tokens-example--1024x301.png\" alt=\"Tokens example\" width=\"700\" height=\"206\" \/><\/p><p>The objects we just created include different amounts of text data, so there is a possibility that when we get any object that has information relevant to a given query, it will definitely go over the token limit. For this, we split the text data into smaller objects containing smaller chunks of text data using a few measures that are appropriate for our needs. The chunk size is the first thing we need to decide.<\/p>\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-7a0c461a jltma-glass-effect-no elementor-widget elementor-widget-heading\" data-id=\"7a0c461a\" data-element_type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">Optimizing Chunk Sizes<\/h2>\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-575facc2 jltma-glass-effect-no elementor-widget elementor-widget-text-editor\" data-id=\"575facc2\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<p>The chunk size is nothing but the character limit for each object. As was previously mentioned, GPT-3.5&#8217;s maximum token limit is 4,096 tokens. However, as a good rule of thumb, we set a maximum of 2000 tokens for retrieved information and 2000 tokens for user queries and past conversations (more on this later).<\/p><p>Retrieving one text object provides a large amount of information to LLM, but in this case we are missing the diversity of information from various other documents, so we retrieve four different objects for each query to ensure a broader range of information. By retrieving multiple text objects, we can enhance the model&#8217;s understanding and provide a more comprehensive response to user queries. This approach allows GPT-3.5 to consider different perspectives and gather insights from a wider pool of sources, resulting in more accurate and well-rounded answers; accordingly, each chunk should be limited to 500 tokens.<\/p><p>Let&#8217;s assume that we can obtain 400 words or so for every 500 tokens. Additionally, a word may have an average of four characters. We may set the chunk size to 4 x 400 = 1600 or 1500 characters, and the chunk overlap to 160 or 150 characters. This indicates that each chunk will share 150 or 160 characters with the chunk before it and the chunk after it. This guarantees that no significant information is omitted and enables a seamless transition between objects in the same document. With this method, we can make sure that LLM receives a variety of data from different documents while still keeping the chunk size modest.<\/p><p>Once we&#8217;ve established an acceptable chunk size, we can&#8217;t just split the text after the first 1500 characters. We must consider the context in order to ensure that the split occurs at a logical and meaningful point in the text. This will help to maintain the data&#8217;s integrity and coherence when it is being processed by LLM. As a result, we employ the RecursiveCharacterTextSplitter provided by langchain, which repeatedly tries to divide the text into manageable pieces using the list of separators in sequence. The default separators are [&#8220;nn&#8221;, &#8220;n&#8221;, &#8220;&#8221;, &#8220;&#8221;]. This has the result of attempting to keep all paragraphs (and then sentences, and eventually words) together for as long as possible because those would ordinarily appear to be the strongest semantically connected text fragments.<\/p>\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-3d593740 jltma-glass-effect-no elementor-widget elementor-widget-heading\" data-id=\"3d593740\" data-element_type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">Harnessing Embeddings for Semantic Understanding<\/h2>\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-1959f28b jltma-glass-effect-no elementor-widget elementor-widget-text-editor\" data-id=\"1959f28b\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<p>Embeddings are vector representations of words or phrases that capture their semantic meaning. They are commonly used in natural language processing tasks such as information retrieval, image recognition, recommender systems, and many more. Embeddings can be generated using pre-trained models such as BERT, RoBERTa, OpenAI&#8217;s text-embedding-ada-002 model, and so on. These pre-trained models are trained on large amounts of text data and can capture the contextual meaning of words and sentences. Each word or sentence is represented as a numerical vector, with similar words or sentences clustered together in vector space. This allows us to compare semantic similarities between words or sentences and can help us with performing a similarity search and retrieving the most similar documents that are close to the user query.<\/p><p>We use OpenAI&#8217;s text-embedding-ada-002 model for the creation of embeddings.<\/p>\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-6110657a jltma-glass-effect-no elementor-widget elementor-widget-heading\" data-id=\"6110657a\" data-element_type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">Unlocking the Potential of Vector Stores<\/h2>\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-22f5f67b jltma-glass-effect-no elementor-widget elementor-widget-text-editor\" data-id=\"22f5f67b\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<p>Vector stores or vector databases are tools for storing and effectively searching for high-dimensional vectors, like numerical vectors representing words or sentences. These databases are designed to handle large amounts of data and perform fast similarity searches, making them useful for applications like natural language processing and information retrieval. By organizing the vectors in a structured manner, vector databases enable efficient storage and retrieval of similar documents, improving the accuracy and speed of search results.<\/p><p>Many databases, like ChromaDB, Pinecone, and libraries like FAISS, provide features specifically tailored for vector search. These databases and libraries offer indexing techniques that optimize the storage and retrieval of high-dimensional vectors, allowing for efficient similarity searches. Additionally, they often include advanced algorithms and models that enable more sophisticated operations such as clustering or nearest neighbor search.<\/p>\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-6056466d jltma-glass-effect-no elementor-widget elementor-widget-heading\" data-id=\"6056466d\" data-element_type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">Exploring FAISS for a Similarity Search\n<\/h2>\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-2ff7f833 jltma-glass-effect-no elementor-widget elementor-widget-text-editor\" data-id=\"2ff7f833\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<p>FAISS, or Facebook AI Similarity Search, is an open-source library developed by Facebook AI Research. FAISS is designed to efficiently handle large-scale vector datasets and provides various indexing methods, including IVF (Inverted File) and HNSW (Hierarchical Navigable Small World). These indexing methods allow for faster search and retrieval of similar vectors, making FAISS suitable for applications such as text retrieval or image search. Additionally, FAISS supports GPU acceleration, enabling even faster computation for similarity search tasks.<\/p><p>The user query or context is converted into an embedding vector using the same model used before. This embedding vector is then compared to the vectors in the dataset using FAISS, which calculates the similarity scores between them. The top-k most similar vectors can be retrieved and used to provide context for the LLM model, which can then generate responses or make predictions.<\/p>\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-4445e2d3 jltma-glass-effect-no elementor-widget elementor-widget-heading\" data-id=\"4445e2d3\" data-element_type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">Enhancing conversational memory\n<\/h2>\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-77b27e6b jltma-glass-effect-no elementor-widget elementor-widget-text-editor\" data-id=\"77b27e6b\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<p>Now that the documents have been turned into embedding vectors and are ready to send information to the LLM, we can begin zero-shot prompting. However, LLM lacks memory of previous interactions; thus, we must add memory so that the chatbot can respond based on previous discussions in order to be more conversational. Due to token restrictions, we are unable to transfer the complete memory into the prompt as the dialogue increases, so we employ an effective approach to conveying this information. In order to capture historical context, we combine buffer window memory with summary memory.<\/p><ol><li>BufferWindowMemory: It keeps track of K sets of recent chats. It is beneficial to respond to the follow-up questions.<\/li><li>Summary: It takes the full discussion and generates a summary of it. It is helpful to acquire a summary of previous chats that BufferWindowMemory cannot recall.<\/li><\/ol><p><strong>Note:<\/strong> Using summary memory slows down the bot&#8217;s response because it has to summarize and generate the text, so if you need your bot to respond quickly, avoid using summary memory and only use buffer window memory.<\/p>\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-3ce89e12 jltma-glass-effect-no elementor-widget elementor-widget-heading\" data-id=\"3ce89e12\" data-element_type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">Designing effective prompt templates\n<\/h2>\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-20941976 jltma-glass-effect-no elementor-widget elementor-widget-text-editor\" data-id=\"20941976\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<p>A &#8220;prompt&#8221; is a piece of information or a question that is given to the bot as input. It serves as the starting point for the bot&#8217;s response. The prompt can be a specific query, a request for information, or any other type of input that the bot needs to generate a relevant and coherent response. The quality and specificity of the prompt greatly influence the accuracy and relevance of the bot&#8217;s response. Therefore, it is important to provide clear and concise prompts to ensure effective communication with the bot. In our case, the prompt should be constructed so that it must produce an answer from the provided collection of documents; if the answer cannot be found in the documents, it must respond with &#8220;I don&#8217;t know&#8221; or a similar phrase and never with an answer drawn from its own knowledge base. We must also provide a memory for the bot so that it can refer back to previous interactions and maintain context in the conversation. Finally, it should receive human input.<\/p><p><strong>Example template:<\/strong><\/p>\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-2c07634a jltma-glass-effect-no elementor-widget elementor-widget-text-editor\" data-id=\"2c07634a\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<p>PROMPT_TEMPLATE:<\/p><p>You&#8217;re an AI assistant having a conversation with a human; the scenario is that you have to respond to the human based solely on the information present in the documents below and queries related to the documents only; if you can&#8217;t find the answer in the provided documents, simply say &#8220;Sorry, I don&#8217;t have information on that&#8221; and don&#8217;t try to make up an answer in any case.<\/p><p><span style=\"font-weight: 400;\">Documents: {context}<\/span><\/p><p><span style=\"font-weight: 400;\">Conversation Summary: {conversation_summary}<\/span><\/p><p><span style=\"font-weight: 400;\">Recent conversation: {recent_conversation}<\/span><\/p><p><span style=\"font-weight: 400;\">\u00a0<\/span><\/p><p><span style=\"font-weight: 400;\">Human input: {question}<\/span><\/p>\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-53932f08 jltma-glass-effect-no elementor-widget elementor-widget-text-editor\" data-id=\"53932f08\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<p>The items enclosed in flower brackets are the prompt&#8217;s input variables, which are later replaced with the appropriate text when passed into the chain.<\/p>\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-2d11609 jltma-glass-effect-no elementor-widget elementor-widget-heading\" data-id=\"2d11609\" data-element_type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">Simplifying Response Generation with Chains\n<\/h2>\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-2abc0959 jltma-glass-effect-no elementor-widget elementor-widget-text-editor\" data-id=\"2abc0959\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<p>The final step is to generate a response based on the input and the documents retrieved. To generate the response, we can use the OpenAI GPT-3.5 model, and for each query, we must retrieve similar documents from the vector store, take the previous conversation from memory, build a prompt, and provide it to the LLM to generate the response. The conversation chain from langchain simplifies the process by automating the retrieval of similar documents, incorporating previous conversation history, and creating a prompt. This allows for a seamless generation of responses using the OpenAI GPT-3.5 model. By utilizing this chain, we can efficiently handle multiple queries and provide accurate and relevant responses.<\/p>\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-5adada38 jltma-glass-effect-no elementor-widget elementor-widget-heading\" data-id=\"5adada38\" data-element_type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">Conclusion\n<\/h2>\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-7da6511b jltma-glass-effect-no elementor-widget elementor-widget-text-editor\" data-id=\"7da6511b\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<p>In conclusion, the combination of ChatBot with OpenAI&#8217;s GPT-3.5 constitutes an important turning point in the growth of conversational AI. We&#8217;ve unlocked the potential of chatbots that deliver meaningful and context-aware replies by using large language models (LLMs). We&#8217;ve set the road for more interesting and productive interactions, ultimately redefining conversational AI with the use of embeddings, libraries like FAISS, and powerful prompt templates. Natural language understanding and generation have significantly improved as a result of these developments in conversational AI. Chatbots are increasingly useful tools for a variety of applications, including customer assistance, virtual assistants, and content production.<\/p>\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t\t\t<\/div>\n\t\t<\/section>\n\t\t\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t","protected":false},"excerpt":{"rendered":"<p>Introduction The power of conversational AI has become a transformational force in a world where technology continues to alter how people live, work, and communicate. Imagine a computer being that can converse with us in a human-like manner, comprehend our wants, and provide insightful answers. In this blog, we embark on a journey into this &hellip;<\/p>\n<p class=\"read-more\"> <a class=\"\" href=\"https:\/\/techpearl.com\/1719395789229\/developing-conversational-ai-applications-by-harnessing-the-power-of-llms\/\"> <span class=\"screen-reader-text\">Developing Conversational AI applications by harnessing the power of LLMs<\/span> Read More &raquo;<\/a><\/p>\n","protected":false},"author":19,"featured_media":18931,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[63,21],"tags":[],"class_list":["post-18929","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-blog","category-software-development"],"_links":{"self":[{"href":"https:\/\/techpearl.com\/1719395789229\/wp-json\/wp\/v2\/posts\/18929","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/techpearl.com\/1719395789229\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/techpearl.com\/1719395789229\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/techpearl.com\/1719395789229\/wp-json\/wp\/v2\/users\/19"}],"replies":[{"embeddable":true,"href":"https:\/\/techpearl.com\/1719395789229\/wp-json\/wp\/v2\/comments?post=18929"}],"version-history":[{"count":6,"href":"https:\/\/techpearl.com\/1719395789229\/wp-json\/wp\/v2\/posts\/18929\/revisions"}],"predecessor-version":[{"id":19265,"href":"https:\/\/techpearl.com\/1719395789229\/wp-json\/wp\/v2\/posts\/18929\/revisions\/19265"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/techpearl.com\/1719395789229\/wp-json\/wp\/v2\/media\/18931"}],"wp:attachment":[{"href":"https:\/\/techpearl.com\/1719395789229\/wp-json\/wp\/v2\/media?parent=18929"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/techpearl.com\/1719395789229\/wp-json\/wp\/v2\/categories?post=18929"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/techpearl.com\/1719395789229\/wp-json\/wp\/v2\/tags?post=18929"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}