[{"data":1,"prerenderedAt":35122},["ShallowReactive",2],{"\u002Fnews\u002Fnewsletter-2025-10-15-app":3,"\u002Fnews\u002Fnewsletter-2025-10-15-news":41,"related-candidates":57,"surr-\u002Fnews\u002Fnewsletter-2025-10-15":35119},{"id":4,"title":5,"author":6,"body":12,"description":26,"extension":27,"meta":28,"navigation":35,"path":36,"seo":37,"sitemap":38,"stem":39,"__hash__":40},"content\u002F9.news\u002Fnewsletter-2025-10-15.md","The next Transformer moment for AI - read in Forbes",{"id":7,"url":8,"name":9,"img":10,"provider":11},"pathway","pathway-team","Pathway Team","\u002Fassets\u002Fpictures\u002Fimage_pathway_team.png","s3",{"type":13,"value":14,"toc":22},"minimark",[15],[16,17],"iframe",{"src":18,"width":19,"height":19,"className":20},"https:\u002F\u002Fmailchi.mp\u002Fpathway\u002Fthe-next-transformer-moment-for-ai","100%",[21],"h-screen",{"title":23,"searchDepth":24,"depth":24,"links":25},"",2,[],"Our groundbreaking post-transformer BDH architecture has launched","md",{"thumbnail":29,"date":31,"tags":32,"aside":34},{"src":30,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fpathway-newsletter-th.png","2025-10-15",[33],"newsletter",false,true,"\u002Fnews\u002Fnewsletter-2025-10-15",{"title":5,"description":26},{"loc":36},"9.news\u002Fnewsletter-2025-10-15","FvzbKpFNpRcNixzjgKAD8FqaafZHNm9ggVGayPfnFGg",{"id":42,"title":5,"author":43,"body":44,"date":31,"description":26,"extension":27,"hidden":34,"keywords":51,"meta":52,"navigation":35,"path":36,"seo":53,"stem":39,"tags":54,"thumbnail":55,"__hash__":56},"news\u002F9.news\u002Fnewsletter-2025-10-15.md",{"id":7,"url":8,"name":9,"img":10,"provider":11},{"type":13,"value":45,"toc":49},[46],[16,47],{"src":18,"width":19,"height":19,"className":48},[21],{"title":23,"searchDepth":24,"depth":24,"links":50},[],null,{"aside":34},{"title":5,"description":26},[33],{"src":30,"provider":11},"zftK3X0Uz8TCosKnBn9PrQaglmXQLrjUI0NU2MVIe-M",[58,101,244,276,308,342,373,404,431,459,490,522,554,585,639,728,757,817,849,934,965,994,1026,1060,1093,1166,1194,1225,1256,1315,1363,1394,1425,1455,1485,1517,1549,1581,1612,1636,1664,1696,1726,1754,1785,1816,1846,1877,1908,1939,1970,1999,2021,2035,2057,2087,2118,2149,2179,2208,2238,2268,2298,2328,2359,2390,2419,2450,2478,2507,2537,2568,2670,2701,2732,2761,2792,2823,2854,2882,2928,3091,5066,6277,7338,8939,15683,17831,19178,19520,20611,20865,20925,22717,23040,23801,23886,29505,29570,35089],{"id":59,"title":60,"author":61,"body":65,"description":88,"extension":27,"meta":89,"navigation":35,"path":96,"seo":97,"sitemap":98,"stem":99,"__hash__":100},"content\u002F9.news\u002F100-women-in-tech-2025.md","100 Women in Tech",{"name":62,"description":23,"website":63,"img":64,"provider":11},"sifted.eu","https:\u002F\u002Fsifted.eu\u002F","\u002Fassets\u002Fblog\u002Favatars\u002Fsifted-av.png",{"type":13,"value":66,"toc":86},[67,72,83],[68,69,71],"h1",{"id":70},"taking-you-to-an-external-site","Taking you to an external site",[73,74,75,76,82],"p",{},"You will be taken to ",[77,78,79],"a",{"href":79,"rel":80},"https:\u002F\u002Fsifted.eu\u002Flist\u002F100-women-in-tech-2025",[81],"nofollow"," in a moment.",[84,85],"redirect",{"url":79},{"title":23,"searchDepth":24,"depth":24,"links":87},[],"Spotlighting 100 women who are making a real impact and shaping the future of Europe's tech ecosystem",{"layout":90,"redirection":35,"thumbnail":91,"tags":93,"date":95},"blog",{"src":92,"contain":35},"https:\u002F\u002Fwww.datocms-assets.com\u002F60124\u002F1758796746-copy-of-nominate-a-female-rising-star-1.png",[94],"news","2025-10-10","\u002Fnews\u002F100-women-in-tech-2025",{"title":60,"description":88},{"loc":96},"9.news\u002F100-women-in-tech-2025","75jFMS8DWTMmQo3XArzmGjst8CZNtgzml0f8jLPlOoY",{"id":102,"title":103,"author":104,"body":111,"description":232,"extension":27,"meta":233,"navigation":35,"path":239,"seo":240,"sitemap":241,"stem":242,"__hash__":243},"content\u002F9.news\u002F845.la-poste-optimizes-colissimo-flows-in-real-time.md","La Poste Optimizes Colissimo Flows in Real Time - Modern Data Stack Recording available",{"id":105,"url":106,"name":107,"description":108,"img":109,"provider":11,"linkedin":110},"claire","claire-nouet","Claire Nouet","COO","\u002Fassets\u002Fauthors\u002Fclaire-nouet.jpg","https:\u002F\u002Fwww.linkedin.com\u002Fin\u002Fclairenouet\u002F",{"type":13,"value":112,"toc":227},[113,116,125,129,139,144,160,164,185,189],[73,114,115],{},"La Poste Group is using Pathway Live Data Framework real-time data processing capabilities to optimize the flows of its well-known Colissimo business line. Jean-Paul Fabre, the Head of Technological innovation at La Poste, and Claire Nouet, the co-founder of Pathway, discussed the collaboration during the Modern Data Stack summit in Paris.",[73,117,118,119,124],{},"Learn how ",[77,120,123],{"href":121,"rel":122},"https:\u002F\u002Fwww.linkedin.com\u002Fcompany\u002Fla-poste-groupe\u002F",[81],"La Poste Group"," deployed ‘operational speed’ AI to forecast more accurately, assess disruption and automate key processes, improving operations in a meaningful way! (The video is in French, recap below!)",[126,127],"video-player",{"src":128},"https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=RyZFWeADJXM",[130,131,136],"pathway-button",{"className":132,"href":134,"icon":135},[133],"mx-auto","\u002Fsuccess-stories\u002Fla-poste","heroicons:check-badge-20-solid",[73,137,138],{},"Read more about the case study",[140,141,143],"h2",{"id":142},"challenges-faced-by-la-poste","Challenges Faced by La Poste",[145,146,147,151,154,157],"ul",{},[148,149,150],"li",{},"La Poste handles a high volume of packages across 17 industrial platforms, has more than 400 truck movements daily, and 16 million+ unused data points.",[148,152,153],{},"Need to provide platform operators with real-time information on truck arrival times, origins, and destinations.",[148,155,156],{},"Requirement to improve efficiency to avoid congestion and incidents.",[148,158,159],{},"Need to identify anomalies in real-time",[140,161,163],{"id":162},"key-objectives-and-benefits","Key Objectives and Benefits",[145,165,166,173,179],{},[148,167,168,172],{},[169,170,171],"strong",{},"Reduce costs",": Pathway Live Data Framework is more cost-effective than existing outsourced solutions, with savings reinvested in functional improvements.",[148,174,175,178],{},[169,176,177],{},"Leverage data",": La Poste generates approximately 16 million geolocation points annually but was not fully utilizing this data. Pathway helps make sense of this data and turn it into actionable insights.",[148,180,181,184],{},[169,182,183],{},"Simplify infrastructure",": Pathway Live Data Framework enables easier integration of different acquisition platforms and sensors and facilitates predictive calculations and rapid prototyping.",[140,186,188],{"id":187},"how-pathway-live-data-framework-works","How Pathway Live Data Framework Works",[145,190,191,197,203,209,215,221],{},[148,192,193,196],{},[169,194,195],{},"Network Identification",": Pathway Live Data Framework identifies nodes (e.g. locations where trucks stop for a significant time) within the network. It differentiates between relevant nodes (e.g., platforms) and irrelevant ones (e.g., driver rest stops).",[148,198,199,202],{},[169,200,201],{},"Route Analysis",": Pathway Live Data Framework determines the primary routes between platforms and identifies alternative routes. This helps La Poste understand if drivers are using preferred (e.g., tolled) routes or opting for alternative routes.",[148,204,205,208],{},[169,206,207],{},"Data Integration",": Pathway Live Data Framework concentrates real-time geolocation data and historical data on a single platform. This creates a digital twin of the network, which can be used for real-time monitoring and analysis.",[148,210,211,214],{},[169,212,213],{},"Real-time data processing",": Data scientists can work with real-time data in Jupiter Notebooks, and code can be moved directly into production, improving productivity.",[148,216,217,220],{},[169,218,219],{},"Anomaly Detection",": Uses machine learning to detect anomalies with security implications.",[148,222,223,226],{},[169,224,225],{},"GPS Data Enhancement",": Pathway Live Data Framework automatically creates polygons based on GPS quality to filter out errors and false positives caused by signal fluctuations and metallic buildings.",{"title":23,"searchDepth":24,"depth":24,"links":228},[229,230,231],{"id":142,"depth":24,"text":143},{"id":162,"depth":24,"text":163},{"id":187,"depth":24,"text":188},"La Poste Group is using Pathway Live Data Framework real-time data processing capabilities to optimize the flows of its well-known Colissimo business line. Jean-Paul Fabre, the Head of Technological innovation at La Poste, and Claire Nouet, the co-founder of Pathway, discussed the collaboration during the Modern Data Stack summit in Paris",{"layout":90,"thumbnail":234,"tags":236,"date":238,"hidden":35},{"src":235},"https:\u002F\u002Fi3.ytimg.com\u002Fvi\u002FRyZFWeADJXM\u002Fmaxresdefault.jpg",[94,237],"video","2025-03-04","\u002Fnews\u002Fla-poste-optimizes-colissimo-flows-in-real-time",{"title":103,"description":232},{"loc":239},"9.news\u002F845.la-poste-optimizes-colissimo-flows-in-real-time","CEAgjtWmJj9l_NW7y66S3kWJGJb3U74djnlA6xfsPV0",{"id":245,"title":246,"author":247,"body":251,"description":265,"extension":27,"meta":266,"navigation":35,"path":271,"seo":272,"sitemap":273,"stem":274,"__hash__":275},"content\u002F9.news\u002F846.becoming-ai-savvy-for-transformation.md","Becoming AI-savvy: going beyond data smarts for business transformation",{"name":248,"img":249,"website":250},"TechInformed","https:\u002F\u002Fwww.google.com\u002Fs2\u002Ffavicons?domain=techinformed.com&sz=128","https:\u002F\u002Ftechinformed.com",{"type":13,"value":252,"toc":263},[253,255,261],[68,254,71],{"id":70},[73,256,75,257,82],{},[77,258,259],{"href":259,"rel":260},"https:\u002F\u002Ftechinformed.com\u002Fbecoming-ai-savvy-for-transformation\u002F",[81],[84,262],{"url":259},{"title":23,"searchDepth":24,"depth":24,"links":264},[],"Claire Nouet, COO and Co-founder of Pathway, outlines how business leaders need to look beyond data smarts to become AI-savvy",{"layout":90,"redirection":35,"thumbnail":267,"tags":269,"date":270,"hidden":35},{"src":268},"https:\u002F\u002Fi0.wp.com\u002Ftechinformed.com\u002Fwp-content\u002Fuploads\u002F2024\u002F11\u002FFirefly-a-man-organising-data-its-lit-up-and-he-is-using-his-fingers-in-the-air-in-an-office-the-1.jpg?fit=2688%2C1536&ssl=1",[94],"2025-02-27","\u002Fnews\u002Fbecoming-ai-savvy-for-transformation",{"title":246,"description":265},{"loc":271},"9.news\u002F846.becoming-ai-savvy-for-transformation","gWDcy0MGEksx-k_cWfdJOMrk0ZHHAJvBCcMkfqfrz4U",{"id":277,"title":278,"author":279,"body":284,"description":23,"extension":27,"meta":298,"navigation":35,"path":303,"seo":304,"sitemap":305,"stem":306,"__hash__":307},"content\u002F9.news\u002F847.pathway-mentioned-in-the-financial-times.md","Forbes: Pathway Navigates Next Road For AI Foundational Models",{"name":280,"description":281,"img":282,"provider":11,"website":283},"Forbes","Adrian Bridgwater - Senior Contributor","\u002Fassets\u002Fblog\u002Favatars\u002Fforbes-av.png","https:\u002F\u002Fwww.forbes.com\u002F",{"type":13,"value":285,"toc":296},[286,288,294],[68,287,71],{"id":70},[73,289,75,290,82],{},[77,291,292],{"href":292,"rel":293},"https:\u002F\u002Fwww.forbes.com\u002Fsites\u002Fadrianbridgwater\u002F2025\u002F02\u002F13\u002Fpathway-navigates-next-road-for-ai-foundational-models\u002F",[81],[84,295],{"url":292},{"title":23,"searchDepth":24,"depth":24,"links":297},[],{"redirection":35,"thumbnail":299,"tags":301,"date":302},{"src":300},"https:\u002F\u002Fimageio.forbes.com\u002Fspecials-images\u002Fimageserve\u002F67ac8673cfa548308522a6f4\u002FPark-System-In-Pennsylvania-Town\u002F960x0.jpg?format=jpg&width=1440",[94],"2025-02-13","\u002Fnews\u002Fpathway-mentioned-in-the-financial-times",{"title":278,"description":23},{"loc":303},"9.news\u002F847.pathway-mentioned-in-the-financial-times","O17X0F02Q9ww1Jsw8ymcbpiX4hRncW2Bn7YidYvKhJE",{"id":309,"title":310,"author":311,"body":318,"description":23,"extension":27,"meta":332,"navigation":35,"path":337,"seo":338,"sitemap":339,"stem":340,"__hash__":341},"content\u002F9.news\u002F848.pathway-ceo-predicts-2025-ai-trends.md","Pathway CEO and co-founder predicts 2025 AI trends: Will your startup survive the shift?",{"id":312,"url":313,"name":314,"description":315,"img":316,"provider":11,"linkedin":317},"zuzanna","zuzanna-stamirowska","Zuzanna Stamirowska","CEO","\u002Fassets\u002Fauthors\u002Fzuzanna-stamirowska.png","https:\u002F\u002Fwww.linkedin.com\u002Fin\u002Fstamirowska\u002F",{"type":13,"value":319,"toc":330},[320,322,328],[68,321,71],{"id":70},[73,323,75,324,82],{},[77,325,326],{"href":326,"rel":327},"https:\u002F\u002Ftechfundingnews.com\u002Fpathway-ceo-and-co-founder-predicts-2025-ai-trends-will-your-startup-survive-the-shift\u002F",[81],[84,329],{"url":326},{"title":23,"searchDepth":24,"depth":24,"links":331},[],{"layout":90,"redirection":35,"thumbnail":333,"tags":335,"date":336},{"src":334},"https:\u002F\u002Ftechfundingnews.com\u002Fwp-content\u002Fuploads\u002F2024\u002F11\u002Fpathway.jpg",[94],"2024-12-19","\u002Fnews\u002Fpathway-ceo-predicts-2025-ai-trends",{"title":310,"description":23},{"loc":337},"9.news\u002F848.pathway-ceo-predicts-2025-ai-trends","a_lg9ZKqG4a3pL0gWgxXDnlacNe-76lzdwULUYI6EjA",{"id":343,"title":344,"author":345,"body":349,"description":23,"extension":27,"meta":363,"navigation":35,"path":368,"seo":369,"sitemap":370,"stem":371,"__hash__":372},"content\u002F9.news\u002F849.pathway-featured-maddyness-insights-and-predictions.md","Pathway featured in Maddyness 2025 Insights and Predictions",{"name":346,"img":347,"provider":11,"website":348},"Maddyness","\u002Fassets\u002Fblog\u002Favatars\u002Fmaddyness-avatar.png","https:\u002F\u002Fwww.maddyness.com\u002F",{"type":13,"value":350,"toc":361},[351,353,359],[68,352,71],{"id":70},[73,354,75,355,82],{},[77,356,357],{"href":357,"rel":358},"https:\u002F\u002Fwww.maddyness.com\u002Fuk\u002F2024\u002F12\u002F20\u002Fprompts-and-predictions-part-2-startup-founders-share-their-insights-and-ambitions-for-2025\u002F",[81],[84,360],{"url":357},{"title":23,"searchDepth":24,"depth":24,"links":362},[],{"layout":90,"redirection":35,"thumbnail":364,"tags":366,"date":367,"hidden":35},{"src":365,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fmaddyness-prediction-th.png",[94],"2024-12-20","\u002Fnews\u002Fpathway-featured-maddyness-insights-and-predictions",{"title":344,"description":23},{"loc":368},"9.news\u002F849.pathway-featured-maddyness-insights-and-predictions","_y_KmTa8LGsKC3IhPjHj9R6LK69CYYoiWdHcBHcrqyc",{"id":374,"title":375,"author":376,"body":380,"description":23,"extension":27,"meta":394,"navigation":35,"path":399,"seo":400,"sitemap":401,"stem":402,"__hash__":403},"content\u002F9.news\u002F850.cnbc-india-spotlighting-pathway.md","CNBC India spotlighting Pathway",{"name":377,"description":23,"website":378,"img":379,"provider":11},"cnbctv18","https:\u002F\u002Fwww.cnbctv18.com","\u002Fassets\u002Fblog\u002Favatars\u002Fcnbctv18-av.png",{"type":13,"value":381,"toc":392},[382,384,390],[68,383,71],{"id":70},[73,385,75,386,82],{},[77,387,388],{"href":388,"rel":389},"https:\u002F\u002Fwww.cnbctv18.com\u002Fbusiness\u002Fstartup\u002Fai-startup-pathway-raises-10-million-dollar-seed-funding-19520684.htm",[81],[84,391],{"url":388},{"title":23,"searchDepth":24,"depth":24,"links":393},[],{"layout":90,"redirection":35,"thumbnail":395,"tags":397,"date":398,"hidden":35},{"src":396,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fcnbc-india-spotlighting-pathway-th.jpg",[94],"2024-12-06","\u002Fnews\u002Fcnbc-india-spotlighting-pathway",{"title":375,"description":23},{"loc":399},"9.news\u002F850.cnbc-india-spotlighting-pathway","1xjNVkt5kb5PvGv4WLobrtc3_SkGq2SC75JY_4tswMI",{"id":405,"title":406,"author":407,"body":408,"description":23,"extension":27,"meta":421,"navigation":35,"path":426,"seo":427,"sitemap":428,"stem":429,"__hash__":430},"content\u002F9.news\u002F851.female-founded-pathway-raises-10m-to-power-future-of-live-ai-systems.md","Pathway raises $10 million in seed funding round",{"name":377,"description":23,"website":378,"img":379,"provider":11},{"type":13,"value":409,"toc":419},[410,412,417],[68,411,71],{"id":70},[73,413,75,414,82],{},[77,415,388],{"href":388,"rel":416},[81],[84,418],{"url":388},{"title":23,"searchDepth":24,"depth":24,"links":420},[],{"layout":90,"redirection":35,"thumbnail":422,"tags":424,"date":425,"hidden":35},{"src":423},"https:\u002F\u002Fimages.cnbctv18.com\u002Fuploads\u002F2024\u002F06\u002Funtitled-design-12-2024-06-e6878307a9dc2dc2aa80d08efe758942.jpg?impolicy=website&width=640&height=360",[94],"2024-12-02","\u002Fnews\u002Ffemale-founded-pathway-raises-10m-to-power-future-of-live-ai-systems",{"title":406,"description":23},{"loc":426},"9.news\u002F851.female-founded-pathway-raises-10m-to-power-future-of-live-ai-systems","8ku4BkZNWzkv240suHS55ojf0gKHcb_DNW0uxHESOdo",{"id":432,"title":433,"author":434,"body":435,"description":449,"extension":27,"meta":450,"navigation":35,"path":454,"seo":455,"sitemap":456,"stem":457,"__hash__":458},"content\u002F9.news\u002F852.pathway-10m-seed-round-news.md","Parisian AI startup Pathway on moving to the US: 'We need to be in the room where it happens, and it happens in the Bay Area",{"name":62,"description":23,"website":63,"img":64,"provider":11},{"type":13,"value":436,"toc":447},[437,439,445],[68,438,71],{"id":70},[73,440,75,441,82],{},[77,442,443],{"href":443,"rel":444},"https:\u002F\u002Fsifted.eu\u002Farticles\u002Fpathway-10m-seed-round-news",[81],[84,446],{"url":443},{"title":23,"searchDepth":24,"depth":24,"links":448},[],"The Paris-founded startup has just closed a $10m seed round and is planning to double down on its US presence",{"layout":90,"redirection":35,"thumbnail":451,"tags":453,"date":425,"hidden":35},{"src":452,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fpathway-10m-seed-round-news-th.png",[94],"\u002Fnews\u002Fpathway-10m-seed-round-news",{"title":433,"description":449},{"loc":454},"9.news\u002F852.pathway-10m-seed-round-news","JtD4UiK73sDn8tYLvb7StqfmZBJ9eOzRGp21uSGE-hY",{"id":460,"title":461,"author":462,"body":465,"description":479,"extension":27,"meta":480,"navigation":35,"path":485,"seo":486,"sitemap":487,"stem":488,"__hash__":489},"content\u002F9.news\u002F852.pathway-raises-10-million-in-funding-to-advance-the-development-of-live-ai.md","ETCIO Southeast Asia covers Pathway Seed Round",{"name":463,"description":23,"img":464,"provider":11},"ET CIOSEA","\u002Fassets\u002Fblog\u002Favatars\u002Fet-ciosea-av.png",{"type":13,"value":466,"toc":477},[467,469,475],[68,468,71],{"id":70},[73,470,75,471,82],{},[77,472,473],{"href":473,"rel":474},"https:\u002F\u002Fciosea.economictimes.indiatimes.com\u002Famp\u002Fnews\u002Fcorporate\u002Fpathway-raises-10-million-in-funding-to-advance-the-development-of-live-ai\u002F115953873",[81],[84,476],{"url":473},{"title":23,"searchDepth":24,"depth":24,"links":478},[],"The funding will enable the next step in Pathway’s mission to create LiveAI™ systems capable of complex reasoning",{"layout":90,"redirection":35,"thumbnail":481,"tags":483,"date":484,"hidden":35},{"src":482,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fet-cio-th.png",[94],"2024-12-04","\u002Fnews\u002Fpathway-raises-10-million-in-funding-to-advance-the-development-of-live-ai",{"title":461,"description":479},{"loc":485},"9.news\u002F852.pathway-raises-10-million-in-funding-to-advance-the-development-of-live-ai","QWhtZvXoC83HxBpz394uPKJDrAs4pkqaV9Nyt1RbgQk",{"id":491,"title":492,"author":493,"body":497,"description":511,"extension":27,"meta":512,"navigation":35,"path":517,"seo":518,"sitemap":519,"stem":520,"__hash__":521},"content\u002F9.news\u002F853.LLM-series-Pathway-Taking-LLMs-out-of-pilot-into-production.md","LLM series - Pathway: Taking LLMs out of pilot into production",{"name":494,"description":23,"website":495,"img":496,"provider":11},"ComputerWeekly","https:\u002F\u002Fwww.computerweekly.com","\u002Fassets\u002Fblog\u002Favatars\u002Fcomputer-weekly-av.png",{"type":13,"value":498,"toc":509},[499,501,507],[68,500,71],{"id":70},[73,502,75,503,82],{},[77,504,505],{"href":505,"rel":506},"https:\u002F\u002Fwww.computerweekly.com\u002Fblog\u002FCW-Developer-Network\u002FLLM-series-Pathway-Taking-LLMs-out-of-pilot-into-production",[81],[84,508],{"url":505},{"title":23,"searchDepth":24,"depth":24,"links":510},[],"Pathway bills itself as the ultimate data processing framework for the AI era",{"layout":90,"redirection":35,"thumbnail":513,"tags":515,"date":516,"hidden":35},{"src":514,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fcomputerweekly-th.png",[94],"2024-01-08","\u002Fnews\u002Fllm-series-pathway-taking-llms-out-of-pilot-into-production",{"title":492,"description":511},{"loc":517},"9.news\u002F853.LLM-series-Pathway-Taking-LLMs-out-of-pilot-into-production","e6OqnXu9gI_X0qFCAJZKgHG5ac8Xx9IJM8ZfA-kBLMQ",{"id":523,"title":524,"author":525,"body":529,"description":543,"extension":27,"meta":544,"navigation":35,"path":549,"seo":550,"sitemap":551,"stem":552,"__hash__":553},"content\u002F9.news\u002F854.investors-ceos-founders-chatgpt-journey.md","Industry Leaders Comment On Biggest Lessons From ChatGPT’s Journey So Far",{"name":526,"description":23,"website":527,"img":528,"provider":11},"TechRound","https:\u002F\u002Fwww.techround.com","\u002Fassets\u002Fblog\u002Favatars\u002Ftechround-av.png",{"type":13,"value":530,"toc":541},[531,533,539],[68,532,71],{"id":70},[73,534,75,535,82],{},[77,536,537],{"href":537,"rel":538},"https:\u002F\u002Ftechround.co.uk\u002Fnews\u002Finvestors-ceos-founders-chatgpt-journey\u002F",[81],[84,540],{"url":537},{"title":23,"searchDepth":24,"depth":24,"links":542},[],"CEOs, founders and investors have stepped in to share their reflections, findings and learnings from the AI startup’s journey since its launch in 2022. Running an AI startup isn’t easy, but it can be successful, as we’ve seen with OpenAI. Here’s what experts think",{"layout":90,"redirection":35,"thumbnail":545,"tags":547,"date":548,"hidden":35},{"src":546},"https:\u002F\u002Ftechround.co.uk\u002Fwp-content\u002Fuploads\u002Ffly-images\u002F120424\u002Fpramod-tiwari-QPWKc779h2E-unsplash-scaled-e1732897235475-1600x1159.jpg",[94],"2024-11-29","\u002Fnews\u002Finvestors-ceos-founders-chatgpt-journey",{"title":524,"description":543},{"loc":549},"9.news\u002F854.investors-ceos-founders-chatgpt-journey","ksx6oXnZroL4TNNG-euP3qkRKxvMLHT018-znUW3k0c",{"id":555,"title":556,"author":557,"body":561,"description":575,"extension":27,"meta":576,"navigation":35,"path":580,"seo":581,"sitemap":582,"stem":583,"__hash__":584},"content\u002F9.news\u002F855.as-cohere-and-writer-mine-the-live-ai-arena-pathway-joins-the-pack-with-a-10m-round.md","As Cohere and Writer mine the ‘LiveAI™’ arena, Pathway joins the pack with a $10M round",{"name":558,"description":23,"website":559,"img":560,"provider":11},"TechCrunch","https:\u002F\u002Fwww.techcrunch.com","\u002Fassets\u002Fblog\u002Favatars\u002Ftechcrunch-av.png",{"type":13,"value":562,"toc":573},[563,565,571],[68,564,71],{"id":70},[73,566,75,567,82],{},[77,568,569],{"href":569,"rel":570},"https:\u002F\u002Ftechcrunch.com\u002F2024\u002F11\u002F29\u002Fas-cohere-and-writer-mine-the-live-ai-arena-pathway-joins-the-pack-with-a-10m-round\u002F",[81],[84,572],{"url":569},{"title":23,"searchDepth":24,"depth":24,"links":574},[],"Pathway named as one of the French fastest-growing companies in 2023",{"layout":90,"redirection":35,"thumbnail":577,"tags":579,"date":548},{"src":578,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Ftechcrunch-art-th.png",[94],"\u002Fnews\u002Fas-cohere-and-writer-mine-the-live-ai-arena-pathway-joins-the-pack-with-a-10m-round",{"title":556,"description":575},{"loc":580},"9.news\u002F855.as-cohere-and-writer-mine-the-live-ai-arena-pathway-joins-the-pack-with-a-10m-round","mpIRgm4CNdcJUjHVwqTwySYijUxF5XMOMehbbryKtj0",{"id":586,"title":587,"author":588,"body":589,"description":627,"extension":27,"meta":628,"navigation":35,"path":634,"seo":635,"sitemap":636,"stem":637,"__hash__":638},"content\u002F9.news\u002F856.jsec-pathway-ai-collaboration-steadfast-foxtrot-2024.md","Joint Support and Enabling Command collaborates with AI company Pathway to combine industry and military expertise",{"id":7,"url":8,"name":9,"img":10,"provider":11},{"type":13,"value":590,"toc":625},[591,594,601,607,610,613,616,619,622],[68,592,587],{"id":593},"joint-support-and-enabling-command-collaborates-with-ai-company-pathway-to-combine-industry-and-military-expertise",[73,595,596],{},[597,598],"img",{"alt":599,"src":600},"JSEC, NATO, Allied Command Transformation, Pathway logos","\u002Fassets\u002Fcontent\u002Fblog\u002Fjsec-pathway-banner.png",[73,602,603,606],{},[169,604,605],{},"Ulm, Germany. 01 OCTOBER 2024"," From 11 to 18 September, more than 250 participants from 24 nations and various NATO entities engaged in one of the largest military enablement exercises at the Joint Support and Enabling Command (JSEC).",[73,608,609],{},"Steadfast Foxtrot 2024 not only trained experts in enablement, reinforcement by forces and sustainment but also set the stage for unveiling NATO’s steps towards the next generation of data processing and simulation systems in close collaboration with the Artificial Intelligence (AI) company Pathway.",[73,611,612],{},"“Robust and innovative data processing technology such as delivered by Pathway, unlocks new capabilities for critical use cases at scale,” emphasizes Major General Gerry Ewart-Brookes, Deputy Chief of Staff Plans. The ability to combine military data sources and open-source information such as civil traffic, social media alerts, and media is crucial for the planning and execution of military operations.",[73,614,615],{},"With its functional demonstrator, the Reinforcement Enablement Simulation Tool (REST), Pathway developed the cornerstone for further development of AI-supported solutions to NATO.",[73,617,618],{},"According to Major General Dirk Kipper, Deputy Chief of Staff Operations, the smart combination of NATO and open source data will speed up situational awareness and bring it to the necessary level, required to successfully operate in the 21st century.",[73,620,621],{},"Exercise Steadfast Foxtrot 2024 tested NATO’s resilience in the face of a greater menace at the Eastern European borders. Military personnel from Allied nations together with NATO staff trained to strengthen mutual cooperation and test the sustainability of NATO forces. Anticipating the movement of troops and equipment from the east coast of North America across the Atlantic and the European continent has never been so critical for an effective deterrence of Allied territory.",[73,623,624],{},"This initiative is a great example of how NATO aims to bridge the gap between what the industry can offer, and the military expertise in order to further improve the safety of the Alliance’s one billion citizens.",{"title":23,"searchDepth":24,"depth":24,"links":626},[],"Discover how Joint Support and Enabling Command (JSEC) collaborates with AI company Pathway during Steadfast Foxtrot 2024 to advance NATO's data processing and simulation capabilities, enhancing military operations and resilience in Eastern Europe",{"layout":90,"thumbnail":629,"tags":631,"date":633},{"src":630,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fjsec-pathway-th.png",[94,632],"case-study","2024-11-13","\u002Fnews\u002Fjsec-pathway-ai-collaboration-steadfast-foxtrot-2024",{"title":587,"description":627},{"loc":634},"9.news\u002F856.jsec-pathway-ai-collaboration-steadfast-foxtrot-2024","cVEuEEcGva-IWzGMlhvEqvWwJqa1RpGqT51yM5oC_do",{"id":640,"title":641,"author":642,"body":643,"description":717,"extension":27,"meta":718,"navigation":35,"path":723,"seo":724,"sitemap":725,"stem":726,"__hash__":727},"content\u002F9.news\u002F858.pathway-meetup-2024.md","The Future of Large Language Models by Lukasz Kaiser and Jan Chorowski",{"id":7,"url":8,"name":9,"img":10,"provider":11},{"type":13,"value":644,"toc":714},[645,648,651,660,664,670,673,679,682,695,698,701],[68,646,641],{"id":647},"the-future-of-large-language-models-by-lukasz-kaiser-and-jan-chorowski",[73,649,650],{},"In April 2024, Pathway hosted an incredible meetup in San Francisco, bringing together some of the brightest minds in AI and data science.",[73,652,653,654,659],{},"We welcomed Łukasz Kaiser, co-author of ",[77,655,658],{"href":656,"rel":657},"https:\u002F\u002Farxiv.org\u002Fabs\u002F1706.03762",[81],"\"Attention is All You Need\""," and Jan Chorowski, Pathway’s CTO, who shared their vision on the future of Large Language Models and the roadmap towards more intelligent foundational Large Language Models (LLMs). Joined by many senior developers, architects, and founders working on generative AI projects, they discussed the evolution of deep learning, the role of Reinforcement Learning with Human Feedback, the future of LLMs, and how achieving infinite LLM Context Windows can be made possible through innovative engineering and efficient retrieval mechanisms.",[140,661,663],{"id":662},"key-topics-covered-by-lukasz-kaiser-and-jan-chorowski","Key Topics Covered by Lukasz Kaiser and Jan Chorowski:",[665,666,667],"ol",{},[148,668,669],{},"Role of Retrievers in Reinforcement Learning for Intelligent LLMs\nŁukasz Kaiser, a renowned researcher at OpenAI who is a co-author of TensorFlow and Transformer Architecture as well as core contributor of Open AI’s GPT-4 and ChatGPT, explored the evolution and future of deep learning technologies and their future.",[73,671,672],{},"He emphasized that more data and compute lead to better results but highlighted the impending data scarcity. Łukasz discussed how in the future, training with fewer, high-quality retrieved data points will be the key to enhancing LLM performance. He also explained the importance of powerful retrieval mechanisms, integrating personal and organizational knowledge graphs, and efficient context provisioning for effective Reinforcement Learning with Human Feedback. Additionally, Łukasz mentioned a missed observation on parsing from his seminal paper \"Attention is All You Need,\" and shared his vision for future Large Language Models (LLMs).",[665,674,676],{"start":675},3,[148,677,678],{},"How Retrievers and LLMs Help Each Other and Achieving Infinite LLM Context Windows",[73,680,681],{},"Jan Chorowski, CTO of Pathway and a prominent figure in AI and NLP, extended the discussion by focusing on the essential role of context and retrieval in AI systems.",[73,683,684,685,689,690,694],{},"Building on Łukasz Kaiser's insights, he highlighted the \"yin and yang” relationship between Large Language Models (LLMs) and retrieval systems. Effective LLM performance and reinforcement learning require robust retrieval mechanisms, and efficient retrieval relies on the processing power of LLMs. Jan shared an ",[77,686,688],{"href":687},"\u002Fdevelopers\u002Ftemplates\u002Frag\u002Fadaptive-rag","example of Adaptive Retrieval Augmented Generation (RAG)"," where they achieved great accuracy at a quarter of the cost by leveraging LLM preprocessing. He emphasized the need for tighter integration to achieve infinite LLM Context Windows and cost-effective AI solutions, inviting the audience to explore these concepts further with resources and examples at ",[77,691,693],{"href":692},"\u002Fdevelopers\u002Fuser-guide\u002Fintroduction\u002Fwelcome","Pathway's developer site",".",[73,696,697],{},"Watch the recording:",[126,699],{"src":700},"https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=_7VirEqCZ4g",[145,702,703,706],{},[148,704,705],{},"​Talk 1: Deep Learning Past and Future: What Comes After GPT? by Lukasz Kaiser (“Attention is All you Need” co-author, Senior Researcher at OpenAI)",[148,707,708,709,713],{},"Talk 2: Taming Unstructured Data: Which Indexing Strategy Wins? by Jan Chorowski (CTO, ",[77,710,712],{"href":711},"\u002F","Pathway",")",{"title":23,"searchDepth":24,"depth":24,"links":715},[716],{"id":662,"depth":24,"text":663},"Key to Reinforcement Learning (RL) in LLMs as data gets scarce? Insights from Transformer co-inventor and Pathway's CTO at our Bay Area Meetup",{"layout":90,"thumbnail":719,"tags":721,"date":722},{"src":720,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fpathway-meetup-th.jpg",[94],"2024-04-30","\u002Fnews\u002Fpathway-meetup-2024",{"title":641,"description":717},{"loc":723},"9.news\u002F858.pathway-meetup-2024","FiRw-86KNah1fnIOBFP78zbKyAHFfP2Nc-yxW5TP6PQ",{"id":729,"title":730,"author":731,"body":732,"description":746,"extension":27,"meta":747,"navigation":35,"path":752,"seo":753,"sitemap":754,"stem":755,"__hash__":756},"content\u002F9.news\u002F862.cdo-magazine.md","How Businesses Can Create Data Frameworks for Real-world AI",{"id":312,"url":313,"name":314,"description":315,"img":316,"provider":11,"linkedin":317},{"type":13,"value":733,"toc":744},[734,736,742],[68,735,71],{"id":70},[73,737,75,738,82],{},[77,739,740],{"href":740,"rel":741},"https:\u002F\u002Fwww.cdomagazine.tech\u002Faiml\u002Fhow-businesses-can-create-data-frameworks-for-real-world-ai?utm_content=277910962&utm_medium=social&utm_source=linkedin&hss_channel=lcp-40830869",[81],[84,743],{"url":740},{"title":23,"searchDepth":24,"depth":24,"links":745},[],"How to make #RealTimeAI a reality? Zuzanna Stamirowska sheds light on the potential of unifying batch, streaming, and data workflows to derive value from data in real time for timely decisions",{"redirection":35,"thumbnail":748,"tags":750,"date":751,"hidden":35},{"src":749,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fcdo-magazine-th.png",[94],"2024-02-09","\u002Fnews\u002Fcdo-magazine",{"title":730,"description":746},{"loc":752},"9.news\u002F862.cdo-magazine","46Hhlp7iLHc2mwsuyzRcLpFM0HhI9pfOOUIjviV_vb0",{"id":758,"title":759,"author":760,"body":765,"description":806,"extension":27,"meta":807,"navigation":35,"path":812,"seo":813,"sitemap":814,"stem":815,"__hash__":816},"content\u002F9.news\u002F864.modern-data-stack.md","Client Testimonial: La Poste at Modern Data Stack",{"name":761,"img":762,"provider":11,"linkedin":763,"website":764},"Modern Data Stack","\u002Fassets\u002Fblog\u002Favatars\u002Fmodern-data-stack-av.png","https:\u002F\u002Fwww.linkedin.com\u002Fcompany\u002Fmodern-data-stack-france\u002F","https:\u002F\u002Fwww.meetup.com\u002Ffr-FR\u002Fmodern-data-stack-france\u002F",{"type":13,"value":766,"toc":804},[767,770,773,780,787,790,793],[68,768,759],{"id":769},"client-testimonial-la-poste-at-modern-data-stack",[73,771,772],{},"Jean-Paul Fabre, Head of Technological Innovation at the Group La Poste, will present how several analytical use cases - network optimization, asset utilization improvement, flow management, Paris 2024 Olympic Games preparation, etc - are enabled by a digital twin and a data model that combines batch and streaming data thanks to Pathway unified engine.",[73,774,775,779],{},[77,776,761],{"href":777,"rel":778},"https:\u002F\u002Fwww.linkedin.com\u002Fcompany\u002Fmodern-data-stack-france\u002Fposts\u002F?feedView=all",[81]," is a community for knowledge-sharing and networking around data thanks to cutting-edge tech.",[73,781,782,783,786],{},"In January, ",[77,784,761],{"href":777,"rel":785},[81]," will host in Criteo’s offices in Paris around streaming and the modern data stack.",[73,788,789],{},"In addition to La Poste, Decathlon, Michelin, BPCE, OVH Cloud and Christophe Blefari will also share their experience and use cases.",[73,791,792],{},"Sign up for the event on January 31st, in Paris - it’s free. Let us know if you are coming!",[73,794,795],{},[77,796,803],{"href":797,"rel":798,"className":799},"https:\u002F\u002Fdocs.google.com\u002Fforms\u002Fd\u002Fe\u002F1FAIpQLSd7R-EUtGDZtvknd5ImTrSE754XhY96KlZeY5Qd_8A9tfekkA\u002Fviewform",[81],[800,801,802],"button","button--secondary","button--secondary-text","Sign up for the event",{"title":23,"searchDepth":24,"depth":24,"links":805},[],"Optimizing Colissimo flows in real time with Pathway. Testimonial",{"layout":90,"thumbnail":808,"tags":810,"date":811,"hidden":35},{"src":809,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fmodern-data-stack-news-th.png",[94],"2023-12-01","\u002Fnews\u002Fmodern-data-stack",{"title":759,"description":806},{"loc":812},"9.news\u002F864.modern-data-stack","JVrpzBm_S2Sqxz_UyEuCLa-auivi9kutTmqtAY1Tvio",{"id":818,"title":819,"author":820,"body":824,"description":838,"extension":27,"meta":839,"navigation":35,"path":844,"seo":845,"sitemap":846,"stem":847,"__hash__":848},"content\u002F9.news\u002F866.eu-startup-news.md","Pathway named among the Top Startups Transforming the European business landscape",{"name":821,"description":23,"img":822,"provider":11,"website":823},"EU Startup News","\u002Fassets\u002Fblog\u002Favatars\u002Feustartup-news-avatar.png","https:\u002F\u002Feustartup.news\u002F",{"type":13,"value":825,"toc":836},[826,828,834],[68,827,71],{"id":70},[73,829,75,830,82],{},[77,831,832],{"href":832,"rel":833},"https:\u002F\u002Feustartup.news\u002Fwhich-french-b2b-startups-are-transforming-the-european-business-landscape\u002F",[81],[84,835],{"url":832},{"title":23,"searchDepth":24,"depth":24,"links":837},[],"Which French B2B Startups Are Transforming the European Business Landscape?",{"layout":90,"redirection":35,"thumbnail":840,"tags":842,"date":843,"hidden":35},{"src":841,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Feustartup-news-th.png",[94],"2023-10-16","\u002Fnews\u002Feu-startup-news",{"title":819,"description":838},{"loc":844},"9.news\u002F866.eu-startup-news","rYlBzCuz5KQDJI3Yq7omWvailj9OkmWVBOlyP50Yibw",{"id":850,"title":851,"author":852,"body":856,"description":919,"extension":27,"meta":920,"navigation":35,"path":929,"seo":930,"sitemap":931,"stem":932,"__hash__":933},"content\u002F9.news\u002F867.gartner-a-n-di-solutions.md","Pathway is featured as a best-suited vendor candidate for Analytics and Decision Intelligence solutions for Supply Chain by Gartner",{"name":853,"img":854,"website":855},"Gartner","\u002Fassets\u002Fcontent\u002Fblog\u002Fgartner-avatar.png","https:\u002F\u002Fwww.gartner.com\u002Fmyhomepage",{"type":13,"value":857,"toc":917},[858,862,865,873,876,902,905,908],[68,859,861],{"id":860},"pathway-was-selected-as-a-vendor-candidate-for-analytics-decision-intelligence-adi-for-supply-chain","Pathway was selected as a vendor candidate for analytics & decision intelligence (A&DI) for Supply Chain.",[73,863,864],{},"This Tool released by Gartner has been designed to support supply chain technology leaders in identifying suitable and best-fit supply chain analytics and decision intelligence vendor candidates for their software evaluation process.",[73,866,867,868,872],{},"At Pathway we are proud to ",[77,869,871],{"href":870},"\u002Fframework\u002Fsolutions\u002Flogistics","enable real-time intelligence in Logistics"," and Supply Chain. With Pathway, get value in under 24 hours: gather your data, get immediately a coherent data model you can work with, and access insights on the fly.",[73,874,875],{},"Read more about how Pathway has been designed for",[145,877,878,884,890,896],{},[148,879,880],{},[77,881,883],{"href":882},"\u002Fframework\u002Fsolutions\u002Flogistics#operations","Operations",[148,885,886],{},[77,887,889],{"href":888},"\u002Fframework\u002Fsolutions\u002Flogistics#iot-deployment-experts","IoT deployment experts",[148,891,892],{},[77,893,895],{"href":894},"\u002Fframework\u002Fsolutions\u002Flogistics#risk-insurance-security","Risk, Insurance and Security teams",[148,897,898],{},[77,899,901],{"href":900},"\u002Fframework\u002Fsolutions\u002Flogistics#digital-data-teams","Digital & Data teams",[73,903,904],{},"… to address business problems in logistics and supply chain at scale.",[73,906,907],{},"Pathway already works with leaders in the market, such as CMA CGM, DB Schenker or La Poste.",[73,909,910,911,916],{},"For Gartner clients, feel free to download the full Excel spreadsheet ",[77,912,915],{"href":913,"rel":914},"https:\u002F\u002Fbit.ly\u002F3SqkBj0",[81],"Tool: Identify A&DI Solutions for Supply Chain",", and reach out!",{"title":23,"searchDepth":24,"depth":24,"links":918},[],"Gartner just released its Tool to support supply chain technology leaders in identifying suitable and best-fit supply chain analytics and decision intelligence (A&DI) vendors candidates",{"layout":90,"thumbnail":921,"tags":923,"date":924,"enterprise":35,"related":925,"hidden":35},{"src":922,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fgartner-th.png",[94],"2023-10-23",[926,927,928],"\u002Fblog\u002Fgartner","\u002Fnews\u002Fgartner-market-guide-supply-chain","\u002Fblog\u002Fmarket-guide-event-stream-processing","\u002Fnews\u002Fgartner-a-n-di-solutions",{"title":851,"description":919},{"loc":929},"9.news\u002F867.gartner-a-n-di-solutions","psRZK2ZD0FCpgRGY_thl0SzMtHL94bgaRqileQlsRbA",{"id":935,"title":936,"author":937,"body":940,"description":954,"extension":27,"meta":955,"navigation":35,"path":960,"seo":961,"sitemap":962,"stem":963,"__hash__":964},"content\u002F9.news\u002F868.tech-informed-article.md","A coffee with… Zuzanna Stamirowska",{"name":248,"description":23,"img":938,"provider":11,"website":939},"\u002Fassets\u002Fblog\u002Favatars\u002Ftechinformed-avatar.png","https:\u002F\u002Ftechinformed.com\u002F",{"type":13,"value":941,"toc":952},[942,944,950],[68,943,71],{"id":70},[73,945,75,946,82],{},[77,947,948],{"href":948,"rel":949},"https:\u002F\u002Ftechinformed.com\u002Fa-coffee-with-zuzanna-stamirowska\u002F",[81],[84,951],{"url":948},{"title":23,"searchDepth":24,"depth":24,"links":953},[],"The strategy polymath and chief exec of real-time data analytics firm Pathway on data pipeline complexity, injecting privacy into large language models and meeting Nobel Prize-winning mathematician John Nash",{"layout":90,"redirection":35,"thumbnail":956,"tags":958,"date":959,"hidden":35},{"src":957,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fcoffe-with-th.png",[94],"2023-10-11","\u002Fnews\u002Ftech-informed-article",{"title":936,"description":954},{"loc":960},"9.news\u002F868.tech-informed-article","0szxkJa6F5YgejX5x7aM8sM0yQFOC4Ry6XspxXN21Ww",{"id":966,"title":967,"author":968,"body":969,"description":983,"extension":27,"meta":984,"navigation":35,"path":989,"seo":990,"sitemap":991,"stem":992,"__hash__":993},"content\u002F9.news\u002F869.european-financial-review.md","Building Data Frameworks for Real-time AI Applications",{"id":312,"url":313,"name":314,"description":315,"img":316,"provider":11,"linkedin":317},{"type":13,"value":970,"toc":981},[971,973,979],[68,972,71],{"id":70},[73,974,75,975,82],{},[77,976,977],{"href":977,"rel":978},"https:\u002F\u002Fwww.europeanfinancialreview.com\u002Fbuilding-data-frameworks-for-real-time-ai-applications\u002F",[81],[84,980],{"url":977},{"title":23,"searchDepth":24,"depth":24,"links":982},[],"Zuzanna Stamirowska, Co-founder and CEO of Pathway, wrote an article explaining the new paradigm of real-time AI applications for financial services organizations, which hold the promise of delivering faster, smart and more efficient processes.",{"redirection":35,"thumbnail":985,"tags":987,"date":988,"hidden":35},{"src":986,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Feuropean-financial-review-th.png",[94],"2023-09-24","\u002Fnews\u002Feuropean-financial-review",{"title":967,"description":983},{"loc":989},"9.news\u002F869.european-financial-review","fCHz0-hkdrUrwwiMr7H6BRNOh-qbOaVufmBzqVArkQ4",{"id":995,"title":996,"author":997,"body":1001,"description":1015,"extension":27,"meta":1016,"navigation":35,"path":1021,"seo":1022,"sitemap":1023,"stem":1024,"__hash__":1025},"content\u002F9.news\u002F871.wearewomen-article.md","Enabling AI to unlearn and self-correct like a human",{"name":998,"description":23,"img":999,"website":1000},"We Are Tech Women","\u002Fassets\u002Fcontent\u002Fblog\u002Favatars\u002Fwearetechwomen-avatar.png","https:\u002F\u002Fwearetechwomen.com\u002F",{"type":13,"value":1002,"toc":1013},[1003,1005,1011],[68,1004,71],{"id":70},[73,1006,75,1007,82],{},[77,1008,1009],{"href":1009,"rel":1010},"https:\u002F\u002Fwearetechwomen.com\u002Fenabling-ai-to-unlearn-and-self-correct-iike-a-human\u002F",[81],[84,1012],{"url":1009},{"title":23,"searchDepth":24,"depth":24,"links":1014},[],"Pathway developed the breakthrough capability to combine batch and streaming logic in the same workflow, AI systems can now be continuously trained or updated with new streaming data, with revisions made to certain data points without requiring a full batch data upload",{"layout":90,"redirection":35,"thumbnail":1017,"tags":1019,"date":1020,"hidden":35},{"src":1018,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fwearetechwomen-th.png",[94],"2023-08-31","\u002Fnews\u002Fwearewomen-article",{"title":996,"description":1015},{"loc":1021},"9.news\u002F871.wearewomen-article","n-jTcCcD9tgvQmXMbF6uRL9oHXW3Phg-qUi1Hroi5jU",{"id":1027,"title":1028,"author":1029,"body":1034,"description":1048,"extension":27,"meta":1049,"navigation":35,"path":1055,"seo":1056,"sitemap":1057,"stem":1058,"__hash__":1059},"content\u002F9.news\u002F873.les-echos-deeptech.md","Pathway quoted in Les Echos: Deeptech - the answer to tomorrow's challenges",{"name":1030,"description":1031,"img":1032,"website":1033},"Les Echos","Economic and financial news from France","\u002Fassets\u002Fcontent\u002Fblog\u002FLesEchos_icon.webp","https:\u002F\u002Fwww.lesechos.fr\u002F",{"type":13,"value":1035,"toc":1046},[1036,1038,1044],[68,1037,71],{"id":70},[73,1039,75,1040,82],{},[77,1041,1042],{"href":1042,"rel":1043},"https:\u002F\u002Fwww.lesechos.fr\u002Fidees-debats\u002Fcercle\u002Fopinion-la-deeptech-est-la-reponse-aux-defis-de-demain-1972505",[81],[84,1045],{"url":1042},{"title":23,"searchDepth":24,"depth":24,"links":1047},[],"Despite the uncertainty surrounding its financial viability, French VC partner, Pierre-Eric Leibovici, believes that deeptech can enable entrepreneurs to play a key role in resolving environmental and societal challenges",{"layout":90,"redirection":35,"lang":1050,"thumbnail":1051,"tags":1053,"date":1054,"hidden":35},"french",{"src":1052,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Flesechos-th.png",[94],"2023-08-25","\u002Fnews\u002Fles-echos-deeptech",{"title":1028,"description":1048},{"loc":1055},"9.news\u002F873.les-echos-deeptech","itJBiHUJHpr4yDPzfdCjO0aTvJP5s5wTORCzYu-Bk_8",{"id":1061,"title":1062,"author":1063,"body":1068,"description":1082,"extension":27,"meta":1083,"navigation":35,"path":1088,"seo":1089,"sitemap":1090,"stem":1091,"__hash__":1092},"content\u002F9.news\u002F874.financial-times-skeptical-case.md","Pathway quoted in the FT: The skeptical case on generative AI",{"name":1064,"description":1065,"img":1066,"website":1067},"Financial Times","Worldʼs leading global business publication","\u002Fassets\u002Fcontent\u002Fblog\u002Ffinancial-times-avatar.png","https:\u002F\u002Fwww.ft.com\u002F",{"type":13,"value":1069,"toc":1080},[1070,1072,1078],[68,1071,71],{"id":70},[73,1073,75,1074,82],{},[77,1075,1076],{"href":1076,"rel":1077},"https:\u002F\u002Fwww.ft.com\u002Fcontent\u002Fed323f48-fe86-4d22-8151-eed15581c337",[81],[84,1079],{"url":1076},{"title":23,"searchDepth":24,"depth":24,"links":1081},[],"John Thornhill highlights how investors are betting on companies that can deploy #GenAI models to solve real-world problems, giving the French startup Pathway as an example",{"redirection":35,"layout":90,"thumbnail":1084,"tags":1086,"date":1087,"hidden":35},{"src":1085,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Ffinancial-times-th.png",[94],"2023-08-17","\u002Fnews\u002Ffinancial-times-skeptical-case",{"title":1062,"description":1082},{"loc":1088},"9.news\u002F874.financial-times-skeptical-case","QIaIsM2AiPeXBIcdOdQenHsMmnqSdqE7wNPh0YVZVxY",{"id":1094,"title":1095,"author":1096,"body":1098,"description":1155,"extension":27,"meta":1156,"navigation":35,"path":1161,"seo":1162,"sitemap":1163,"stem":1164,"__hash__":1165},"content\u002F9.news\u002F877.maddyness-gen-ai-mapping.md","Pathway named as a promising Generative AI leader (in French)",{"name":346,"img":1097,"website":348},"\u002Fassets\u002Fcontent\u002Fblog\u002Fmaddyness-avatar.png",{"type":13,"value":1099,"toc":1153},[1100,1103,1116,1124,1131,1134,1140,1146],[68,1101,1095],{"id":1102},"pathway-named-as-a-promising-generative-ai-leader-in-french",[73,1104,1105,1106,1111,1112,713],{},"The generative AI market was worth almost $40 billion in 2022 and should approach $70 billion by the end of this year, according to ",[77,1107,1110],{"href":1108,"rel":1109},"https:\u002F\u002Fwww.bloomberg.com\u002Fcompany\u002Fpress\u002Fgenerative-ai-to-become-a-1-3-trillion-market-by-2032-research-finds\u002F",[81],"Bloomberg Intelligence",". And this is just the beginning, as the market is expected to reach $1,300 billion by 2032 (Source: ",[77,1113,1115],{"href":1108,"rel":1114},[81],"Bloomberg: Generative AI to Become a $1.3 Trillion Market by 2032, Research Finds.",[73,1117,1118,1123],{},[77,1119,1122],{"href":1120,"rel":1121},"https:\u002F\u002Fwww.resonance.vc\u002F",[81],"Resonance Venture",", a French Venture Capital, released a mapping of the main GenAI players in France, including established French companies such as Hugging Face.",[73,1125,1126,1130],{},[77,1127,712],{"href":1128,"rel":1129},"https:\u002F\u002Fpathway.com\u002F",[81]," is the single, integrated processing layer for real-time intelligence. It allows easy mix-and-match of batch, streaming, and LLM architectures - all within one engine.",[73,1132,1133],{},"Real-time learning is made possible by an effective and scalable engine, which powers LLMs and machine learning models. These models are automatically updated thanks to a framework that combines streaming and batch data, and which is user-friendly and flexible for developers, data engineers, and data scientists. Leading experts in the field of artificial intelligence make up the team, which is headed by Zuzanna Stamirowska. They include CTO Jan Chorowski, co-authors of Geoff Hinton and Yoshua Bengio, as well as Business Angel Lukasz Kaiser, who co-authored Tensor Flow and is also known as the \"T\" in ChatGPT.",[73,1135,1136,1139],{},[169,1137,1138],{},"Zuzanna Stamirowska, CEO & Co-Founder of Pathway",", comments: “Our mission has been to enable real-time data processing, while giving developers a simple experience regardless of whether they work with batch, streaming, or LLM systems. Pathway is truly facilitating the convergence of historical and real-time data for the first time.”",[1141,1142],"article-img",{":zoomable":1143,"alt":1144,"src":1145},"true","A list of companies in the ecosystem where Pathway has a place in data preparation","assets\u002Fcontent\u002Fblog\u002Fecosysteme-gen-ai-francais.png",[73,1147,1148,1149],{},"Read the full article on Maddyness:\n",[77,1150,1151],{"href":1151,"rel":1152},"https:\u002F\u002Fwww.maddyness.com\u002F2023\u002F07\u002F21\u002Ffrance-europe-ia-generative\u002F",[81],{"title":23,"searchDepth":24,"depth":24,"links":1154},[],"Maddyness reposted a mapping of future European GenAI leaders",{"layout":90,"thumbnail":1157,"tags":1159,"date":1160,"hidden":35},{"src":1158},"\u002Fassets\u002Fcontent\u002Fblog\u002Fmaddyness-gen-ai-mapping-th.png",[94],"2023-07-26","\u002Fnews\u002Fmaddyness-gen-ai-mapping",{"title":1095,"description":1155},{"loc":1161},"9.news\u002F877.maddyness-gen-ai-mapping","Eihm6ie265YMlNhHk1f07Qd20bUSgCAPSi1bGAZB4Gw",{"id":1167,"title":1168,"author":1169,"body":1170,"description":1184,"extension":27,"meta":1185,"navigation":35,"path":1189,"seo":1190,"sitemap":1191,"stem":1192,"__hash__":1193},"content\u002F9.news\u002F878.maddyness-article-about-pathway.md","French deep tech start-up announces the general launch of its data processing engine",{"name":346,"img":347,"provider":11,"website":348},{"type":13,"value":1171,"toc":1182},[1172,1174,1180],[68,1173,71],{"id":70},[73,1175,75,1176,82],{},[77,1177,1178],{"href":1178,"rel":1179},"https:\u002F\u002Fwww.maddyness.com\u002F2023\u002F07\u002F26\u002Fpathway-ia\u002F",[81],[84,1181],{"url":1178},{"title":23,"searchDepth":24,"depth":24,"links":1183},[],"Comment Pathway veut permettre aux IA d’apprendre et «d’oublier» en temps réel",{"redirection":35,"lang":1050,"thumbnail":1186,"tags":1188,"date":1160,"hidden":35},{"src":1187},"\u002Fassets\u002Fcontent\u002Fblog\u002Fmaddyness-th.png",[94],"\u002Fnews\u002Fmaddyness-article-about-pathway",{"title":1168,"description":1184},{"loc":1189},"9.news\u002F878.maddyness-article-about-pathway","6bw1TzK3-BpHJ_7S92d75rmNoDj1q17ICFs0s1cz7Yc",{"id":1195,"title":1196,"author":1197,"body":1201,"description":1215,"extension":27,"meta":1216,"navigation":35,"path":1220,"seo":1221,"sitemap":1222,"stem":1223,"__hash__":1224},"content\u002F9.news\u002F879.nextweb-article.md","AI startup launches ‘fastest data processing engine’ on the market",{"name":1198,"img":1199,"linkedin":1200},"The Next Web","\u002Fassets\u002Fcontent\u002Fblog\u002Fthenextweb-avatar.png","https:\u002F\u002Fthenextweb.com\u002F",{"type":13,"value":1202,"toc":1213},[1203,1205,1211],[68,1204,71],{"id":70},[73,1206,75,1207,82],{},[77,1208,1209],{"href":1209,"rel":1210},"https:\u002F\u002Fthenextweb.com\u002Fnews\u002Fai-startup-launches-fastest-data-processing-engine-market",[81],[84,1212],{"url":1209},{"title":23,"searchDepth":24,"depth":24,"links":1214},[],"Female-led Pathway says its system can 'forget' in real-time, like a human",{"redirection":35,"thumbnail":1217,"tags":1219,"date":1160,"hidden":35},{"src":1218},"\u002Fassets\u002Fcontent\u002Fblog\u002Fthenextweb-th.png",[94],"\u002Fnews\u002Fnextweb-article",{"title":1196,"description":1215},{"loc":1220},"9.news\u002F879.nextweb-article","C8KvjMP7iX7Mb6iHM6TrsS7HoN3kLvL_NofgN_RZmfY",{"id":1226,"title":1227,"author":1228,"body":1231,"description":1245,"extension":27,"meta":1246,"navigation":35,"path":1251,"seo":1252,"sitemap":1253,"stem":1254,"__hash__":1255},"content\u002F9.news\u002F881.le-point.md","Pathway CEO featured in the ranking of the next generation of geniuses by the French national weekly Le Point",{"name":1229,"img":1230},"Le Point","\u002Fassets\u002Fcontent\u002Fblog\u002Fle-point-avatar.png",{"type":13,"value":1232,"toc":1243},[1233,1235,1241],[68,1234,71],{"id":70},[73,1236,75,1237,82],{},[77,1238,1239],{"href":1239,"rel":1240},"https:\u002F\u002Fwww.lepoint.fr\u002Fsciences-nature\u002Fpalmares-des-inventeurs-du-point-la-releve-du-genie-francais-22-06-2023-2525696_1924.php",[81],[84,1242],{"url":1239},{"title":23,"searchDepth":24,"depth":24,"links":1244},[],"An exceptional jury (including Alain Aspect, the 2022 Nobel Prize in Physics) has selected Pathway among the teams whose breakthroughs will change our lives.",{"layout":90,"redirection":35,"thumbnail":1247,"tags":1249,"date":1250,"lang":1050,"hidden":35},{"src":1248},"\u002Fassets\u002Fcontent\u002Fblog\u002Fle-point-th.png",[94],"2023-06-22","\u002Fnews\u002Fle-point",{"title":1227,"description":1245},{"loc":1251},"9.news\u002F881.le-point","OUZIcSjAo36ly-2a3b9vVD5VujQtcdg1FYAt1ZO5Cuo",{"id":1257,"title":1258,"author":1259,"body":1261,"description":1306,"extension":27,"meta":1307,"navigation":35,"path":927,"seo":1311,"sitemap":1312,"stem":1313,"__hash__":1314},"content\u002F9.news\u002F882.gartner-market-guide-supply-chain.md","Pathway is a Representative Vendor in Gartner 2023 Market Guide for Analytics and Decision Intelligence Platforms in Supply Chain",{"name":853,"img":854,"website":1260},"https:\u002F\u002Fwww.gartner.com\u002Faccount\u002Fsignin?method=initialize&TARGET=https%3A%2F%2Fwww.gartner.com%2Fmyhomepage",{"type":13,"value":1262,"toc":1304},[1263,1267,1270,1273,1287,1295],[68,1264,1266],{"id":1265},"pathway-was-selected-as-a-representative-vendor-in-the-2023-gartner-market-guide-for-analytics-and-decision-intelligence-platforms-in-supply-chain","Pathway was selected as a Representative Vendor in the 2023 Gartner Market Guide for Analytics and Decision Intelligence Platforms in Supply Chain.",[73,1268,1269],{},"According to Gartner analysts Christian Titze and Noha Tohamy, ”By 2026, 50% of organizations will have to evaluate analytics and business intelligence (ABI) and data science and machine learning (DSML) platforms as a single platform due to market convergence.”",[73,1271,1272],{},"We are proud to enable industry leaders to “achieve contextualized, connected, and continuous insights” through:",[145,1274,1275,1281],{},[148,1276,1277,1280],{},[169,1278,1279],{},"Pathway Live Data Framework",": the most powerful data processing framework, currently used for real-time anomaly detection, predictive analytics, IoT and logs data observability, recommender systems, and alerting, and which works particularly well with data in motion: data tables, live events data, etc.",[148,1282,1283,1286],{},[169,1284,1285],{},"Pathway Logistics App",": our lighthouse data platform built in the Pathway Live Data Framework. It is a one-stop-shop cloud-based application to provide immediately actionable insights on top of data for logistics assets, including IoT data and status data.",[73,1288,1289,1290,1294],{},"With “functional teams ",[1291,1292,1293],"span",{},"..."," looking to speed up cross-functional decision making on the basis of more near-real-time and broader datasets”, Pathway is best positioned to deliver value to Enterprise clients",[73,1296,1297,1298,1303],{},"For Gartner clients, feel free to read the full ",[77,1299,1302],{"href":1300,"rel":1301},"https:\u002F\u002Fwww.gartner.com\u002Fdocument\u002F4478399?ref=solrAll&refval=374406409&",[81],"Gartner Market Guide"," for Analytics and Decision Intelligence Platforms in Supply Chain, and do reach out!",{"title":23,"searchDepth":24,"depth":24,"links":1305},[],"Gartner published its latest edition of its Market Guide for Analytics and Decision Intelligence Platforms in Supply Chain, and named Pathway a Representative Vendor",{"layout":90,"thumbnail":1308,"tags":1309,"date":1310,"enterprise":35,"hidden":35},{"src":922,"provider":11},[94],"2023-06-26",{"title":1258,"description":1306},{"loc":927},"9.news\u002F882.gartner-market-guide-supply-chain","tP3vo6GbxAauQ5f6cpZIUu67fOh2QoDy98dFadYvPFo",{"id":1316,"title":1317,"author":1318,"body":1319,"description":1352,"extension":27,"meta":1353,"navigation":35,"path":1358,"seo":1359,"sitemap":1360,"stem":1361,"__hash__":1362},"content\u002F9.news\u002F883.vivatech-by-the-french-prime.md","Pathway awarded at VivaTech by the French Prime Minister Elisabeth Borne",{"id":312,"url":313,"name":314,"description":315,"img":316,"provider":11,"linkedin":317},{"type":13,"value":1320,"toc":1350},[1321,1324,1327,1330,1334,1341,1344,1347],[68,1322,1317],{"id":1323},"pathway-awarded-at-vivatech-by-the-french-prime-minister-elisabeth-borne",[73,1325,1326],{},"Pathway is proud to announce that Zuzanna Stamirowska, CEO at Pathway was awarded at Viva Technology, Europe’s biggest tech event, held in Paris, France.",[73,1328,1329],{},"Elisabeth Borne, the French Prime Minister awarded Zuzanna Stamirowska for her performance on stage and the achievements of Pathway as the most powerful data processing framework to power real-time data products and pipelines. This happened a few weeks after the CIO of Goldman Sachs declared that “going from batch to real-time (processing) was like going from printed newspapers to the Internet.\"",[1331,1332],"tweet",{"tweet-url":1333},"https:\u002F\u002Ftwitter.com\u002FElisabeth_Borne\u002Fstatus\u002F1669798321550925837",[73,1335,1336,1337,1340],{},"Pathway was brought to life by a stellar team: the CTO Jan Chorowski worked with the Godfathers of AI, Geoff Hinton, and Yoshua Bengio, the CSO Adrian Kosowski had his Ph.D. at 20 and is a world-class expert in high-scale distributed computing, and ",[77,1338,314],{"href":317,"rel":1339},[81]," is the author of the state of the art model for forecasting of maritime trade. Pathway is supported by business angels such as Lukasz Kaiser, known to be behind the “T” in GPT.",[73,1342,1343],{},"“Very soon real-time will become the norm for data processing and it’s a game changer for everybody starting from financial services, Formula 1,  supply chains, online marketing, retail, energy…  the list goes on.” declared Zuzanna Stamirowska during her pitch in front of the Viva Tech assembly.",[73,1345,1346],{},"Watch Pathway Winning Pitch",[126,1348],{"src":1349},"https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=iSRUMsM15uw",{"title":23,"searchDepth":24,"depth":24,"links":1351},[],"The real-time revolution starts now. Zuzanna Stamirowska, CEO at Pathway was awarded by the French Prime Minister at Viva Technology",{"layout":90,"thumbnail":1354,"tags":1356,"date":1357,"hidden":35},{"src":1355},"\u002Fassets\u002Fcontent\u002Fblog\u002Fvivatech-by-the-french-prime-th.jpg",[94,237],"2023-06-16","\u002Fnews\u002Fvivatech-by-the-french-prime",{"title":1317,"description":1352},{"loc":1358},"9.news\u002F883.vivatech-by-the-french-prime","rwC0oXE8zilyf-Z1rReNA22z_4cCppRnSJvxXNiODIQ",{"id":1364,"title":1365,"author":1366,"body":1369,"description":1383,"extension":27,"meta":1384,"navigation":35,"path":1389,"seo":1390,"sitemap":1391,"stem":1392,"__hash__":1393},"content\u002F9.news\u002F885.paris-saclay.md","Interview for Paris-Saclay",{"name":1367,"description":23,"img":1368},"Paris-Saclay","\u002Fassets\u002Fcontent\u002Fblog\u002Fparis-saclay-avatar.png",{"type":13,"value":1370,"toc":1381},[1371,1373,1379],[68,1372,71],{"id":70},[73,1374,75,1375,82],{},[77,1376,1377],{"href":1377,"rel":1378},"https:\u002F\u002Fepa-paris-saclay.fr\u002Factualites-et-decryptages\u002Ftoutes-nos-publications\u002Ftraitement-de-donnees-en-temps-reel-la-voie-pathway\u002F",[81],[84,1380],{"url":1377},{"title":23,"searchDepth":24,"depth":24,"links":1382},[],"Traitement de données en temps réel, la voie Pathway",{"layout":90,"redirection":35,"thumbnail":1385,"tags":1387,"date":1388,"lang":1050,"hidden":35},{"src":1386},"\u002Fassets\u002Fcontent\u002Fblog\u002Fparis-saclay-th.png",[94],"2023-05-15","\u002Fnews\u002Fparis-saclay",{"title":1365,"description":1383},{"loc":1389},"9.news\u002F885.paris-saclay","O251ObVwyoWAaDWKZxIy2oUkJsFD6lvwL8etWTRTpM4",{"id":1395,"title":1396,"author":1397,"body":1398,"description":1414,"extension":27,"meta":1415,"navigation":35,"path":1420,"seo":1421,"sitemap":1422,"stem":1423,"__hash__":1424},"content\u002F9.news\u002F890.LesEchosdeeptechportrait.md","Pathway in Les Echos - CEO Portrait",{"name":1030,"img":1032},{"type":13,"value":1399,"toc":1412},[1400,1402,1409],[68,1401,71],{"id":70},[73,1403,75,1404,1408],{},[77,1405,1406],{"href":1406,"rel":1407},"https:\u002F\u002Fwww.lesechos.fr\u002Fstart-up\u002Fportraits\u002Fces-chercheurs-qui-ont-decide-de-fonder-une-start-up-dans-la-deeptech-1895133in",[81]," a moment.",[84,1410],{"url":1411},"https:\u002F\u002Fwww.lesechos.fr\u002Fstart-up\u002Fportraits\u002Fces-chercheurs-qui-ont-decide-de-fonder-une-start-up-dans-la-deeptech-1895133",{"title":23,"searchDepth":24,"depth":24,"links":1413},[],"You will be taken to https:\u002F\u002Fwww.lesechos.fr\u002Fstart-up\u002Fportraits\u002Fces-chercheurs-qui-ont-decide-de-fonder-une-start-up-dans-la-deeptech-1895133in a moment.",{"layout":90,"redirection":35,"date":1416,"thumbnail":1417,"lang":1050,"tags":1419,"hidden":35},"2023-01-09",{"src":1418,"contain":35},"\u002Fassets\u002Fcontent\u002Fblog\u002FLes_echos_(logo).svg.png",[94],"\u002Fnews\u002Flesechosdeeptechportrait",{"title":1396,"description":1414},{"loc":1420},"9.news\u002F890.LesEchosdeeptechportrait","pZORk4p2FH6u3MrAcCO5EhQ2ELpdd460uq-AmbQUmG8",{"id":1426,"title":1427,"author":1428,"body":1430,"description":1444,"extension":27,"meta":1445,"navigation":35,"path":1450,"seo":1451,"sitemap":1452,"stem":1453,"__hash__":1454},"content\u002F9.news\u002F900.female-led-deeptech-startup.md","Female-led deeptech startup Pathway announces its $4.5m pre-seed round",{"name":62,"img":1429},"\u002Fassets\u002Fcontent\u002Fblog\u002Favatars\u002Fsifted-av.png",{"type":13,"value":1431,"toc":1442},[1432,1434,1440],[68,1433,71],{"id":70},[73,1435,75,1436,82],{},[77,1437,1438],{"href":1438,"rel":1439},"https:\u002F\u002Fsifted.eu\u002Farticles\u002Ffemale-led-deeptech-pathway-ai\u002F",[81],[84,1441],{"url":1438},{"title":23,"searchDepth":24,"depth":24,"links":1443},[],"The French startup helps companies with real-time analysis of fast-moving data",{"layout":90,"redirection":35,"date":1446,"thumbnail":1447,"tags":1449,"hidden":35},"2022-12-06",{"src":1448},"https:\u002F\u002Fimages.sifted.eu\u002Fwp-content\u002Fuploads\u002F2022\u002F12\u002F05160857\u002FPathway-Zuzanna-CEO-and-Claire-COO-scaled-e1670263090806.jpg?w=2048&h=1054&q=75&fit=crop&auto=compress,format",[94],"\u002Fnews\u002Ffemale-led-deeptech-startup",{"title":1427,"description":1444},{"loc":1450},"9.news\u002F900.female-led-deeptech-startup","bWvQ4kdATzaURG-JnB5xocilq2oWQ19d2CtWpvZOr1g",{"id":1456,"title":1457,"author":1458,"body":1461,"description":1457,"extension":27,"meta":1475,"navigation":35,"path":1480,"seo":1481,"sitemap":1482,"stem":1483,"__hash__":1484},"content\u002F9.news\u002F944.BFMTV.md","Pathway on BFM Business - the French Business TV channel",{"name":1459,"img":1460},"BFM Business","\u002Fassets\u002Fcontent\u002Fblog\u002FBFM_icon.jpg",{"type":13,"value":1462,"toc":1473},[1463,1465,1471],[68,1464,71],{"id":70},[73,1466,75,1467,82],{},[77,1468,1469],{"href":1469,"rel":1470},"https:\u002F\u002Fwww.bfmtv.com\u002Feconomie\u002Freplay-emissions\u002Ftech-and-co\u002Fparis-saclay-spring-2022-quelles-sont-les-cinq-start-up-primees-19-05_VN-202205190687.html",[81],[84,1472],{"url":1469},{"title":23,"searchDepth":24,"depth":24,"links":1474},[],{"layout":90,"redirection":35,"date":1476,"thumbnail":1477,"tags":1479,"hidden":35},"2022-05-30",{"src":1478,"contain":35},"\u002Fassets\u002Fcontent\u002Fblog\u002FBFM-Business-Logo.png",[94],"\u002Fnews\u002Fbfmtv",{"title":1457,"description":1457},{"loc":1480},"9.news\u002F944.BFMTV","F2xlZrHg-lOWCsFl4IVT2Kn7JOokBI_3oWMjtn7qMmU",{"id":1486,"title":1487,"author":1488,"body":1491,"description":23,"extension":27,"meta":1505,"navigation":35,"path":1512,"seo":1513,"sitemap":1514,"stem":1515,"__hash__":1516},"content\u002F9.news\u002Fai-should-think-like-the-human-brain-dragon-hatchling-bdh-copies-neurons-for-unlimited-context-and-higher-efficiency.md","AI should think like the human brain: Dragon Hatchling (BDH) copies neurons for unlimited context and higher efficiency",{"name":1489,"favicon":1490,"website":1490},"Notebook Check","notebookcheck.com",{"type":13,"value":1492,"toc":1503},[1493,1495,1501],[68,1494,71],{"id":70},[73,1496,75,1497,82],{},[77,1498,1499],{"href":1499,"rel":1500},"https:\u002F\u002Fwww.notebookcheck.com\u002FKI-soll-denken-wie-das-menschliche-Gehirn-Dragon-Hatchling-BDH-kopiert-Neuronen-fuer-unbegrenzten-Kontext-und-hoehere-Effizienz.1142453.0.html",[81],[84,1502],{"url":1499},{"title":23,"searchDepth":24,"depth":24,"links":1504},[],{"redirection":35,"lang":1506,"tags":1507,"date":1509,"related":34,"thumbnail":1510},"german",[94,1508],"bdh","2025-10-21",{"src":1511,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fnotebook-check-th.png","\u002Fnews\u002Fai-should-think-like-the-human-brain-dragon-hatchling-bdh-copies-neurons-for-unlimited-context-and-higher-efficiency",{"title":1487,"description":23},{"loc":1512},"9.news\u002Fai-should-think-like-the-human-brain-dragon-hatchling-bdh-copies-neurons-for-unlimited-context-and-higher-efficiency","E78jyY34D64jUzRWCgPEw8BIkMSTAf8H0-b_pQya1pw",{"id":1518,"title":1519,"author":1520,"body":1524,"description":1538,"extension":27,"meta":1539,"navigation":35,"path":1544,"seo":1545,"sitemap":1546,"stem":1547,"__hash__":1548},"content\u002F9.news\u002Fan-ai-startup-looks-toward-the-post-transformer-era.md","Pathway Looks Toward the Post-Transformer Era",{"name":1521,"img":1522,"provider":11,"website":1523},"Wall Street Journal","\u002Fassets\u002Fblog\u002Favatars\u002Fwsj-th.png","https:\u002F\u002Fwww.wsj.com\u002F",{"type":13,"value":1525,"toc":1536},[1526,1528,1534],[68,1527,71],{"id":70},[73,1529,75,1530,82],{},[77,1531,1532],{"href":1532,"rel":1533},"https:\u002F\u002Fwww.wsj.com\u002Farticles\u002Fan-ai-startup-looks-toward-the-post-transformer-era-4e362db8",[81],[84,1535],{"url":1532},{"title":23,"searchDepth":24,"depth":24,"links":1537},[],"The architecture underlying large language models revolutionized AI. Pathway’s Dragon Hatchling is designed to do more",{"layout":90,"redirection":35,"thumbnail":1540,"tags":1542,"date":1543,"pinned":35},{"src":1541},"https:\u002F\u002Fimages.wsj.net\u002Fim-92775332\u002Fsocial",[94,1508],"2025-12-01","\u002Fnews\u002Fan-ai-startup-looks-toward-the-post-transformer-era",{"title":1519,"description":1538},{"loc":1544},"9.news\u002Fan-ai-startup-looks-toward-the-post-transformer-era","fglFx7Sg1bAkyqpxIht9N7qebCqX7fJIkCYiSMKWpBM",{"id":1550,"title":1551,"author":1552,"body":1555,"description":1569,"extension":27,"meta":1570,"navigation":35,"path":1576,"seo":1577,"sitemap":1578,"stem":1579,"__hash__":1580},"content\u002F9.news\u002Farxiv-bdh.md","The Dragon Hatchling: The Missing Link between the Transformer and Models of the Brain",{"name":1553,"favicon":1554,"website":1554},"Arxiv.org","arxiv.org",{"type":13,"value":1556,"toc":1567},[1557,1559,1565],[68,1558,71],{"id":70},[73,1560,75,1561,82],{},[77,1562,1563],{"href":1563,"rel":1564},"https:\u002F\u002Farxiv.org\u002Fabs\u002F2509.26507",[81],[84,1566],{"url":1563},{"title":23,"searchDepth":24,"depth":24,"links":1568},[],"The relationship between computing systems and the brain has served as motivation for pioneering theoreticians since John von Neumann and Alan Turing. Uniform, scale-free biological networks, such as the brain, have powerful properties, including generalizing over time, which is the main barrier for Machine Learning on the path to Universal Reasoning Models.",{"redirection":35,"thumbnail":1571,"tags":1573,"date":1575,"pinnedFor":1574,"related":34},{"src":1572,"provider":11,"contain":35},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fpathway-card.png",[1508,1574],"research","2025-11-30","\u002Fnews\u002Farxiv-bdh",{"title":1551,"description":1569},{"loc":1576},"9.news\u002Farxiv-bdh","np-1o-WhTxjr6jpP7u9lmswKMhf1XiYqR8fk2M1z7ms",{"id":1582,"title":1583,"author":1584,"body":1587,"description":1601,"extension":27,"meta":1602,"navigation":35,"path":1607,"seo":1608,"sitemap":1609,"stem":1610,"__hash__":1611},"content\u002F9.news\u002Faws-reinvent-2025-the-new-ai-architecture-that-adapts-and-thinks-just-like-humans.md","AWS re:Invent 2025 -The new AI architecture that adapts and thinks just like humans",{"name":1585,"img":1586,"provider":11},"AWS Events","\u002Fassets\u002Fblog\u002Favatars\u002Faws-av.png",{"type":13,"value":1588,"toc":1599},[1589,1591,1597],[68,1590,71],{"id":70},[73,1592,75,1593,82],{},[77,1594,1595],{"href":1595,"rel":1596},"https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=cnUSW0pLFVk",[81],[84,1598],{"url":1595},{"title":23,"searchDepth":24,"depth":24,"links":1600},[],"This session introduces Baby Dragon Hatchling [BDH], a post-Transformer architecture designed to solve the core barrier faced by today’s AI: its inability to generalize over time",{"redirection":35,"thumbnail":1603,"tags":1605,"date":1606},{"src":1604},"https:\u002F\u002Fimg.youtube.com\u002Fvi\u002FcnUSW0pLFVk\u002Fmaxresdefault.jpg",[94,1508,237],"2025-12-04","\u002Fnews\u002Faws-reinvent-2025-the-new-ai-architecture-that-adapts-and-thinks-just-like-humans",{"title":1583,"description":1601},{"loc":1607},"9.news\u002Faws-reinvent-2025-the-new-ai-architecture-that-adapts-and-thinks-just-like-humans","pj1U1xUgGOOG-sGzZuJbk0sbvx6mrV-lANeRewrBiIk",{"id":1613,"title":1614,"author":1615,"body":1616,"description":23,"extension":27,"meta":1628,"navigation":35,"path":1631,"seo":1632,"sitemap":1633,"stem":1634,"__hash__":1635},"content\u002F9.news\u002Fbenchmarks.md","Benchmarks: Fundamental Unlocks for AI",{"id":7,"url":8,"name":9,"img":10,"provider":11},{"type":13,"value":1617,"toc":1626},[1618,1620,1623],[68,1619,71],{"id":70},[73,1621,1622],{},"You will be taken to pathway.com\u002F#benchmarks in a moment.",[84,1624],{"url":1625},"\u002F#benchmarks",{"title":23,"searchDepth":24,"depth":24,"links":1627},[],{"redirection":35,"redirect":1625,"thumbnail":1629,"tags":1630,"date":1575,"related":34},{"src":1572,"provider":11,"contain":35},[1574],"\u002Fnews\u002Fbenchmarks",{"title":1614,"description":23},{"loc":1631},"9.news\u002Fbenchmarks","ImKvj0jlTuh5GeQueGsnH_fXisCVrX6iywnX6nSB0NI",{"id":1637,"title":1638,"author":1639,"body":1640,"description":88,"extension":27,"meta":1654,"navigation":35,"path":1659,"seo":1660,"sitemap":1661,"stem":1662,"__hash__":1663},"content\u002F9.news\u002Fcan-ai-learn-and-evolve-like-a-brain-pathways-bold-research-thinks-so.md","Can AI Learn And Evolve Like A Brain? Pathway’s Bold Research Thinks So",{"name":280,"img":282,"provider":11,"website":283},{"type":13,"value":1641,"toc":1652},[1642,1644,1650],[68,1643,71],{"id":70},[73,1645,75,1646,82],{},[77,1647,1648],{"href":1648,"rel":1649},"https:\u002F\u002Fwww.forbes.com\u002Fsites\u002Fvictordey\u002F2025\u002F10\u002F08\u002Fcan-ai-learn-and-evolve-like-a-brain-pathways-bold-research-thinks-so\u002F",[81],[84,1651],{"url":1648},{"title":23,"searchDepth":24,"depth":24,"links":1653},[],{"layout":90,"redirection":35,"thumbnail":1655,"tags":1657,"date":1658,"pinned":35},{"src":1656},"https:\u002F\u002Fimageio.forbes.com\u002Fspecials-images\u002Fimageserve\u002F68e69cf3c94f1ee9ed00f2d3\u002F0x0.jpg?format=jpg&amp;height=900&amp;width=1600&amp;fit=bounds",[94,1508],"2025-10-08","\u002Fnews\u002Fcan-ai-learn-and-evolve-like-a-brain-pathways-bold-research-thinks-so",{"title":1638,"description":88},{"loc":1659},"9.news\u002Fcan-ai-learn-and-evolve-like-a-brain-pathways-bold-research-thinks-so","0uC4pPb_VbWHp4Dq3jWS7iAfsy4dkV9VPIsWk25sFqw",{"id":1665,"title":1666,"author":1667,"body":1671,"description":1685,"extension":27,"meta":1686,"navigation":35,"path":1691,"seo":1692,"sitemap":1693,"stem":1694,"__hash__":1695},"content\u002F9.news\u002Fembracing-modern-live-data-pipelines-is-key-to-scaling-enterprise-ai.md","Embracing Modern Live Data Pipelines is Key to Scaling Enterprise AI",{"name":1668,"description":23,"linkedin":1669,"img":1670,"provider":11},"RTInsights","https:\u002F\u002Fwww.rtinsights.com\u002Fauthor\u002Fthree-amigos\u002F","\u002Fassets\u002Fblog\u002Favatars\u002Frtinsights-av.png",{"type":13,"value":1672,"toc":1683},[1673,1675,1681],[68,1674,71],{"id":70},[73,1676,75,1677,82],{},[77,1678,1679],{"href":1679,"rel":1680},"https:\u002F\u002Fwww.rtinsights.com\u002Fembracing-modern-live-data-pipelines-is-key-to-scaling-enterprise-ai\u002F",[81],[84,1682],{"url":1679},{"title":23,"searchDepth":24,"depth":24,"links":1684},[],"Scaling LLM deployments from pilot to production requires and emphasis on adopting real-time, adaptive data frameworks",{"layout":90,"redirection":35,"thumbnail":1687,"tags":1689,"date":1690,"hidden":35},{"src":1688},"https:\u002F\u002Fwww.rtinsights.com\u002Fwp-content\u002Fuploads\u002F2025\u002F03\u002FDepositphotos_539418084_S-800x534.jpg",[94],"2025-03-19","\u002Fnews\u002Fembracing-modern-live-data-pipelines-is-key-to-scaling-enterprise-ai",{"title":1666,"description":1685},{"loc":1691},"9.news\u002Fembracing-modern-live-data-pipelines-is-key-to-scaling-enterprise-ai","OZmaFp48K4mGreSB23tDG2i1thQNT_QQQo9aZaX311M",{"id":1697,"title":1698,"author":1699,"body":1701,"description":23,"extension":27,"meta":1715,"navigation":35,"path":1721,"seo":1722,"sitemap":1723,"stem":1724,"__hash__":1725},"content\u002F9.news\u002Fforbes-poland-ceo-profile-in-polish.md","Forbes Poland: CEO profile (in Polish)",{"name":280,"img":282,"provider":11,"website":1700},"https:\u002F\u002Fwww.forbes.pl\u002F",{"type":13,"value":1702,"toc":1713},[1703,1705,1711],[68,1704,71],{"id":70},[73,1706,75,1707,82],{},[77,1708,1709],{"href":1709,"rel":1710},"https:\u002F\u002Fwww.forbes.pl\u002Fpolka-chce-wstrzasnac-dolina-krzemowa-z-jej-produktu-korzystaja-juz-intel-i-nato\u002F7bmkk92",[81],[84,1712],{"url":1709},{"title":23,"searchDepth":24,"depth":24,"links":1714},[],{"redirection":35,"thumbnail":1716,"tags":1718,"date":1719,"related":34,"lang":1720},{"src":1717},"https:\u002F\u002Focdn.eu\u002Fpulscms-transforms\u002F1\u002FrVgk9kpTURBXy9iZDgwYjVkZGVmM2E3OGZhMTIxMzdhZTE0MjUzZmQ1MS5qcGeSlQPNAXoAzQXpzQNUkwXNA47NAl_eAAGhMAU",[94],"2025-03-20","polish","\u002Fnews\u002Fforbes-poland-ceo-profile-in-polish",{"title":1698,"description":23},{"loc":1721},"9.news\u002Fforbes-poland-ceo-profile-in-polish","ElXrU-Hsh0hNlficbG1cUyv9NRYA_tB9A8V-WEuMuEM",{"id":1727,"title":1728,"author":1729,"body":1730,"description":23,"extension":27,"meta":1744,"navigation":35,"path":1749,"seo":1750,"sitemap":1751,"stem":1752,"__hash__":1753},"content\u002F9.news\u002Ffrom-data-sure-to-ai-savvy-unlocking-the-next-stage-of-business-transformation.md","From Data-sure To AI-savvy: Unlocking The Next Stage Of Business Transformation",{"id":105,"url":106,"name":107,"description":108,"img":109,"provider":11,"linkedin":110},{"type":13,"value":1731,"toc":1742},[1732,1734,1740],[68,1733,71],{"id":70},[73,1735,75,1736,82],{},[77,1737,1738],{"href":1738,"rel":1739},"https:\u002F\u002Fwww.techdogs.com\u002Finspire\u002Fc-suite-scoops\u002Ffrom-data-sure-to-ai-savvy-unlocking-the-next-stage-of-business-transformation",[81],[84,1741],{"url":1738},{"title":23,"searchDepth":24,"depth":24,"links":1743},[],{"redirection":35,"thumbnail":1745,"tags":1747,"date":1748,"related":34,"hidden":35},{"src":1746,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Ffrom-data-sure-to-ai-savvy-unlocking-the-next-stage-of-business-transformation-th.png",[94],"2025-04-17","\u002Fnews\u002Ffrom-data-sure-to-ai-savvy-unlocking-the-next-stage-of-business-transformation",{"title":1728,"description":23},{"loc":1749},"9.news\u002Ffrom-data-sure-to-ai-savvy-unlocking-the-next-stage-of-business-transformation","Oxm2s0yVWPvn15BMl7KGddOK6mSv6UgexLH-WYn1fyI",{"id":1755,"title":1756,"author":1757,"body":1760,"description":1774,"extension":27,"meta":1775,"navigation":35,"path":1780,"seo":1781,"sitemap":1782,"stem":1783,"__hash__":1784},"content\u002F9.news\u002Fhow-neolabs-are-betting-against-the-openai-model-and-what-it-means-for-founders.md","How 'Neolabs' Are Betting Against the OpenAI Model and What It Means for Founders",{"name":1758,"img":1759,"provider":11},"Inc.","\u002Fassets\u002Fblog\u002Favatars\u002Finc-av.png",{"type":13,"value":1761,"toc":1772},[1762,1764,1770],[68,1763,71],{"id":70},[73,1765,75,1766,82],{},[77,1767,1768],{"href":1768,"rel":1769},"https:\u002F\u002Fwww.inc.com\u002Fbrett-farmiloe\u002Fhow-neolabs-are-betting-against-the-openai-model-and-what-it-means-for-founders\u002F91279024",[81],[84,1771],{"url":1768},{"title":23,"searchDepth":24,"depth":24,"links":1773},[],"AI is moving faster and becoming more diverse than ever. The next competitive advantage may come from a new architecture.",{"redirection":35,"thumbnail":1776,"tags":1778,"date":1779,"related":34},{"src":1777},"https:\u002F\u002Fimg-cdn.inc.com\u002Fimage\u002Fupload\u002Ff_webp,q_auto,c_fit,w_1024\u002Fvip\u002F2025\u002F12\u002Fneolabs-ai-models-new-inc.jpg",[94,1508],"2025-12-21","\u002Fnews\u002Fhow-neolabs-are-betting-against-the-openai-model-and-what-it-means-for-founders",{"title":1756,"description":1774},{"loc":1780},"9.news\u002Fhow-neolabs-are-betting-against-the-openai-model-and-what-it-means-for-founders","xFTp6GdYzTVECOLC2H98vulR2Qk4dBbVF0cO3y_N2V0",{"id":1786,"title":1787,"author":1788,"body":1791,"description":1805,"extension":27,"meta":1806,"navigation":35,"path":1811,"seo":1812,"sitemap":1813,"stem":1814,"__hash__":1815},"content\u002F9.news\u002Finside-pathways-post-transformer-architecture-designed-for-memory-and-on-the-fly-learning.md","Inside Pathway's Post-Transformer Architecture Designed for Memory and On-the-Fly Learning",{"name":1789,"img":1790},"Eye on AI","https:\u002F\u002Fyt3.ggpht.com\u002Fytc\u002FAIdro_mjddd9v-_8K0iqLY0aO7UmCi0yPYKQe-QP48kcTeViIQ=s48-c-k-c0x00ffffff-no-rj",{"type":13,"value":1792,"toc":1803},[1793,1795,1801],[68,1794,71],{"id":70},[73,1796,75,1797,82],{},[77,1798,1799],{"href":1799,"rel":1800},"https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=E6WmXnEFDgc",[81],[84,1802],{"url":1799},{"title":23,"searchDepth":24,"depth":24,"links":1804},[],"This episode dives into why Pathway’s Baby Dragon Hatchling (BDH) might mark the beginning of the post-transformer era in AI",{"redirection":35,"thumbnail":1807,"tags":1809,"date":1810,"related":34},{"src":1808},"https:\u002F\u002Fimg.youtube.com\u002Fvi\u002FE6WmXnEFDgc\u002Fmaxresdefault.jpg",[94,1508],"2026-03-11","\u002Fnews\u002Finside-pathways-post-transformer-architecture-designed-for-memory-and-on-the-fly-learning",{"title":1787,"description":1805},{"loc":1811},"9.news\u002Finside-pathways-post-transformer-architecture-designed-for-memory-and-on-the-fly-learning","u2jUEoLA6AzenMeIQEr2fmBxonoWrEZ9m7K3cYEmnXE",{"id":1817,"title":1818,"author":1819,"body":1821,"description":23,"extension":27,"meta":1835,"navigation":35,"path":1841,"seo":1842,"sitemap":1843,"stem":1844,"__hash__":1845},"content\u002F9.news\u002Finteligencia-artificial-aprender-cerebro-humano.md","Can an artificial intelligence learn like a human brain does? A startup believes it has achieved this",{"name":1820,"img":282,"provider":11,"website":283},"Forbes Argentina",{"type":13,"value":1822,"toc":1833},[1823,1825,1831],[68,1824,71],{"id":70},[73,1826,75,1827,82],{},[77,1828,1829],{"href":1829,"rel":1830},"https:\u002F\u002Fes-us.noticias.yahoo.com\u002Finteligencia-artificial-aprender-cerebro-humano-231500859.html",[81],[84,1832],{"url":1829},{"title":23,"searchDepth":24,"depth":24,"links":1834},[],{"layout":90,"redirection":35,"tags":1836,"date":1837,"lang":1838,"thumbnail":1839},[94,1508],"2025-09-09","spanish",{"src":1840,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Finteligencia-artificial-aprender-cerebro-humano-th.png","\u002Fnews\u002Finteligencia-artificial-aprender-cerebro-humano",{"title":1818,"description":23},{"loc":1841},"9.news\u002Finteligencia-artificial-aprender-cerebro-humano","UPFIaoXonc99vw41WnfvRims_9VXO8-CuD8ZutNizQM",{"id":1847,"title":1848,"author":1849,"body":1852,"description":1866,"extension":27,"meta":1867,"navigation":35,"path":1872,"seo":1873,"sitemap":1874,"stem":1875,"__hash__":1876},"content\u002F9.news\u002Fla-poste-partners-with-pathway-to-create-digital-twin-of-fleet.md","La Poste partners with Pathway to create digital twin of fleet",{"name":1850,"img":1851,"provider":11},"IoT Insider","\u002Fassets\u002Fblog\u002Favatars\u002Fiot-insider-av.png",{"type":13,"value":1853,"toc":1864},[1854,1856,1862],[68,1855,71],{"id":70},[73,1857,75,1858,82],{},[77,1859,1860],{"href":1860,"rel":1861},"https:\u002F\u002Fwww.iotinsider.com\u002Fnews\u002Fla-poste-partners-with-pathway-to-create-digital-twin-of-fleet\u002F",[81],[84,1863],{"url":1860},{"title":23,"searchDepth":24,"depth":24,"links":1865},[],"French postal service company, La Poste, has partnered with Pathway, to create a digital twin of the La Poste logistics fleet.",{"redirection":35,"thumbnail":1868,"tags":1870,"date":1871,"related":34,"hidden":35},{"src":1869},"https:\u002F\u002Fwww.iotinsider.com\u002Fwp-content\u002Fuploads\u002F2025\u002F06\u002FPathway-and-La-Poste-770x433.png",[94],"2025-06-22","\u002Fnews\u002Fla-poste-partners-with-pathway-to-create-digital-twin-of-fleet",{"title":1848,"description":1866},{"loc":1872},"9.news\u002Fla-poste-partners-with-pathway-to-create-digital-twin-of-fleet","uONdBHKd1rACi_xonfCcrhIRUuRRQUe0kzRd1J5Ez7Y",{"id":1878,"title":1879,"author":1880,"body":1883,"description":1569,"extension":27,"meta":1897,"navigation":35,"path":1903,"seo":1904,"sitemap":1905,"stem":1906,"__hash__":1907},"content\u002F9.news\u002Fmila-bdh.md","BDH: The Missing Link between the Transformer and Models of the Brain",{"name":1881,"favicon":1882,"website":1882},"MILA Tea Talk","mila.quebec",{"type":13,"value":1884,"toc":1895},[1885,1887,1893],[68,1886,71],{"id":70},[73,1888,75,1889,82],{},[77,1890,1891],{"href":1891,"rel":1892},"https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=aCc5f16WDIg",[81],[84,1894],{"url":1891},{"title":23,"searchDepth":24,"depth":24,"links":1896},[],{"redirection":35,"thumbnail":1898,"tags":1900,"date":1902,"related":34},{"src":1899,"contain":35},"https:\u002F\u002Fimg.youtube.com\u002Fvi\u002FaCc5f16WDIg\u002Fmaxresdefault.jpg",[1901,237,1508,1574],"podcast","2026-03-10","\u002Fnews\u002Fmila-bdh",{"title":1879,"description":1569},{"loc":1903},"9.news\u002Fmila-bdh","WEVF1pEidtvndgWxIyyU_-U2aT2bxWdBMkCEM7NxP4c",{"id":1909,"title":1910,"author":1911,"body":1914,"description":1928,"extension":27,"meta":1929,"navigation":35,"path":1934,"seo":1935,"sitemap":1936,"stem":1937,"__hash__":1938},"content\u002F9.news\u002Fnew-ai-research-claims-to-be-getting-closer-to-modeling-human-brain.md","New AI research claims to be getting closer to modeling human brain",{"name":1912,"favicon":1913,"website":1913},"Semafor","semafor.com",{"type":13,"value":1915,"toc":1926},[1916,1918,1924],[68,1917,71],{"id":70},[73,1919,75,1920,82],{},[77,1921,1922],{"href":1922,"rel":1923},"https:\u002F\u002Fwww.semafor.com\u002Farticle\u002F10\u002F01\u002F2025\u002Fnew-ai-research-claims-to-be-getting-closer-to-modeling-human-brain",[81],[84,1925],{"url":1922},{"title":23,"searchDepth":24,"depth":24,"links":1927},[],"A new paper published by startup Pathway says it can more closely replicate connections between neurons than current LLMs",{"redirection":35,"thumbnail":1930,"tags":1932,"date":1933,"related":34},{"src":1931,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fnew-ai-research-claims-to-be-getting-closer-to-modeling-human-brain-th.png",[94,1508],"2025-10-01","\u002Fnews\u002Fnew-ai-research-claims-to-be-getting-closer-to-modeling-human-brain",{"title":1910,"description":1928},{"loc":1934},"9.news\u002Fnew-ai-research-claims-to-be-getting-closer-to-modeling-human-brain","5W-pQWKhHD-8DxCgb7yIh_YxO1rdrux4ZtyJLwIxgIk",{"id":1940,"title":1941,"author":1942,"body":1945,"description":1959,"extension":27,"meta":1960,"navigation":35,"path":1965,"seo":1966,"sitemap":1967,"stem":1968,"__hash__":1969},"content\u002F9.news\u002Fnew-dragon-hatchling-ai-architecture-modeled-after-the-human-brain-could-be-a-key-step-toward-agi-researchers-claim.md","New 'Dragon Hatchling' AI architecture modeled after the human brain could be a key step toward AGI, researchers claim",{"name":1943,"favicon":1944,"website":1944},"Live Science","livescience.com",{"type":13,"value":1946,"toc":1957},[1947,1949,1955],[68,1948,71],{"id":70},[73,1950,75,1951,82],{},[77,1952,1953],{"href":1953,"rel":1954},"https:\u002F\u002Fwww.livescience.com\u002Ftechnology\u002Fartificial-intelligence\u002Fnew-dragon-hatchling-ai-architecture-modeled-after-the-human-brain-could-be-a-key-step-toward-agi-researchers-claim",[81],[84,1956],{"url":1953},{"title":23,"searchDepth":24,"depth":24,"links":1958},[],"Scientists say a new kind of AI could bridge the gap between current systems and machines that learn and think more like us.",{"redirection":35,"thumbnail":1961,"tags":1963,"date":1964,"related":34},{"src":1962},"https:\u002F\u002Fcdn.mos.cms.futurecdn.net\u002FtxftSjJw9qMtxWy85qzWFY-650-80.png.webp",[94,1508],"2025-11-13","\u002Fnews\u002Fnew-dragon-hatchling-ai-architecture-modeled-after-the-human-brain-could-be-a-key-step-toward-agi-researchers-claim",{"title":1941,"description":1959},{"loc":1965},"9.news\u002Fnew-dragon-hatchling-ai-architecture-modeled-after-the-human-brain-could-be-a-key-step-toward-agi-researchers-claim","8jGm5GWJ04de_gyDTYNKXLjZoOcwFgC_rLOIcioFStI",{"id":1971,"title":1972,"author":1973,"body":1974,"description":1988,"extension":27,"meta":1989,"navigation":35,"path":1994,"seo":1995,"sitemap":1996,"stem":1997,"__hash__":1998},"content\u002F9.news\u002Fnew-podcast-europes-ai-opportunity-brand.md","New podcast: Europe’s AI opportunity",{"name":62,"img":1429},{"type":13,"value":1975,"toc":1986},[1976,1978,1984],[68,1977,71],{"id":70},[73,1979,75,1980,82],{},[77,1981,1982],{"href":1982,"rel":1983},"https:\u002F\u002Fsifted.eu\u002Farticles\u002Fnew-podcast-europes-ai-opportunity-brnd",[81],[84,1985],{"url":1982},{"title":23,"searchDepth":24,"depth":24,"links":1987},[],"Europe’s AI Opportunity is a podcast mini-series, in collaboration with Nebius, to explore the state of play for the continent’s AI startups",{"layout":90,"redirection":35,"date":1990,"thumbnail":1991,"tags":1993,"hidden":35},"2025-07-09",{"src":1992,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fnew-podcast-europes-ai-opportunity-brand-th.png",[94,1901],"\u002Fnews\u002Fnew-podcast-europes-ai-opportunity-brand",{"title":1972,"description":1988},{"loc":1994},"9.news\u002Fnew-podcast-europes-ai-opportunity-brand","9skf0StJJmAOF_VlfwQU4PxvOVkY9-xsWxxyHL6DnTc",{"id":2000,"title":2001,"author":2002,"body":2003,"description":2011,"extension":27,"meta":2012,"navigation":35,"path":2016,"seo":2017,"sitemap":2018,"stem":2019,"__hash__":2020},"content\u002F9.news\u002Fnewsletter-2025-02-25.md","Pathway to the Silicon Valley",{"id":7,"url":8,"name":9,"img":10,"provider":11},{"type":13,"value":2004,"toc":2009},[2005],[16,2006],{"src":2007,"width":19,"height":19,"className":2008},"https:\u002F\u002Fmailchi.mp\u002Fpathway\u002Fnewsletterpaloalto",[21],{"title":23,"searchDepth":24,"depth":24,"links":2010},[],"New Silicon Valley Office: Let’s Talk AI",{"thumbnail":2013,"date":2014,"tags":2015,"aside":34},{"src":30,"provider":11},"2025-02-25",[33],"\u002Fnews\u002Fnewsletter-2025-02-25",{"title":2001,"description":2011},{"loc":2016},"9.news\u002Fnewsletter-2025-02-25","Ii3OAF-NcT_M_llE1hW-qYUwTumyfEWppimSMh3solM",{"id":4,"title":5,"author":2022,"body":2023,"description":26,"extension":27,"meta":2030,"navigation":35,"path":36,"seo":2033,"sitemap":2034,"stem":39,"__hash__":40},{"id":7,"url":8,"name":9,"img":10,"provider":11},{"type":13,"value":2024,"toc":2028},[2025],[16,2026],{"src":18,"width":19,"height":19,"className":2027},[21],{"title":23,"searchDepth":24,"depth":24,"links":2029},[],{"thumbnail":2031,"date":31,"tags":2032,"aside":34},{"src":30,"provider":11},[33],{"title":5,"description":26},{"loc":36},{"id":2036,"title":2037,"author":2038,"body":2039,"description":2047,"extension":27,"meta":2048,"navigation":35,"path":2052,"seo":2053,"sitemap":2054,"stem":2055,"__hash__":2056},"content\u002F9.news\u002Fnewsletter-2026-01-15.md","WSJ: Pathway marks the beginning of the post-transformer era",{"id":7,"url":8,"name":9,"img":10,"provider":11},{"type":13,"value":2040,"toc":2045},[2041],[16,2042],{"src":2043,"width":19,"height":19,"className":2044},"https:\u002F\u002Fmailchi.mp\u002Fpathway\u002Fthe-next-transformer-moment-for-ai-17996970",[21],{"title":23,"searchDepth":24,"depth":24,"links":2046},[],"Thank you for your support, trust and partnership in building this next foundation together",{"thumbnail":2049,"date":2050,"tags":2051,"aside":34},{"src":30,"provider":11},"2026-01-15",[33],"\u002Fnews\u002Fnewsletter-2026-01-15",{"title":2037,"description":2047},{"loc":2052},"9.news\u002Fnewsletter-2026-01-15","CLHeLEpdMc70hqWtNMW-5AlIelPTJycCwWQOh-Sp8hY",{"id":2058,"title":2059,"author":2060,"body":2063,"description":23,"extension":27,"meta":2077,"navigation":35,"path":2082,"seo":2083,"sitemap":2084,"stem":2085,"__hash__":2086},"content\u002F9.news\u002Fopen-ai-coding-jobs-silicon-valley-google.md","OpenAI claims AI is making coding jobs better, not worse. Is it true?",{"name":2061,"favicon":2062,"website":2062},"Fast Company","fastcompany.com",{"type":13,"value":2064,"toc":2075},[2065,2067,2073],[68,2066,71],{"id":70},[73,2068,75,2069,82],{},[77,2070,2071],{"href":2071,"rel":2072},"https:\u002F\u002Fwww.fastcompany.com\u002F91411004\u002Fopen-ai-coding-jobs-silicon-valley-google",[81],[84,2074],{"url":2071},{"title":23,"searchDepth":24,"depth":24,"links":2076},[],{"layout":90,"redirection":35,"tags":2078,"date":2079,"thumbnail":2080},[94],"2025-09-26",{"src":2081,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Ffastcompany-th.png","\u002Fnews\u002Fopen-ai-coding-jobs-silicon-valley-google",{"title":2059,"description":23},{"loc":2082},"9.news\u002Fopen-ai-coding-jobs-silicon-valley-google","0tUzqMv2fLoiiJeiSzxe4-gj2OrMOOG0TZGZL8tsXYo",{"id":2088,"title":2089,"author":2090,"body":2093,"description":2107,"extension":27,"meta":2108,"navigation":35,"path":2113,"seo":2114,"sitemap":2115,"stem":2116,"__hash__":2117},"content\u002F9.news\u002Fopinion-eu-could-be-epicenter-of-ai-academia-as-us-cuts-funding.md","Opinion: EU could be epicenter of AI academia as US cuts funding",{"name":2091,"img":2092,"provider":11},"SiliconRepublic.com","\u002Fassets\u002Fblog\u002Favatars\u002Fsiliconrepublic-av.png",{"type":13,"value":2094,"toc":2105},[2095,2097,2103],[68,2096,71],{"id":70},[73,2098,75,2099,82],{},[77,2100,2101],{"href":2101,"rel":2102},"https:\u002F\u002Fwww.siliconrepublic.com\u002Finnovation\u002Fopinion-eu-could-be-epicentre-of-ai-academia-as-us-cuts-funding",[81],[84,2104],{"url":2101},{"title":23,"searchDepth":24,"depth":24,"links":2106},[],"EU academic research could be the big winner as US cuts funding to its research bodies",{"redirection":35,"thumbnail":2109,"tags":2111,"date":2112,"related":34,"hidden":35},{"src":2110},"https:\u002F\u002Fwww.siliconrepublic.com\u002Fwp-content\u002Fuploads\u002F2025\u002F05\u002FZuzanna-Stamirowska.jpg",[94],"2025-05-16","\u002Fnews\u002Fopinion-eu-could-be-epicenter-of-ai-academia-as-us-cuts-funding",{"title":2089,"description":2107},{"loc":2113},"9.news\u002Fopinion-eu-could-be-epicenter-of-ai-academia-as-us-cuts-funding","ELouOZ5nJLGrugUxdHjzWo9zXcc5GO1Zo87QvJTQ3lw",{"id":2119,"title":2120,"author":2121,"body":2124,"description":2138,"extension":27,"meta":2139,"navigation":35,"path":2144,"seo":2145,"sitemap":2146,"stem":2147,"__hash__":2148},"content\u002F9.news\u002Fpalo-alto-ai-firm-pathway-unveils-post-transformer-architecture-for-autonomous-ai.md","Palo Alto AI Firm Pathway Unveils Post-Transformer Architecture for Autonomous AI",{"name":2122,"favicon":2123,"website":2123},"Quantum Zeitgeist","quantumzeitgeist.com",{"type":13,"value":2125,"toc":2136},[2126,2128,2134],[68,2127,71],{"id":70},[73,2129,75,2130,82],{},[77,2131,2132],{"href":2132,"rel":2133},"https:\u002F\u002Fquantumzeitgeist.com\u002Fpalo-alto-ai-firm-pathway-unveils-post-transformer-architecture-for-autonomous-ai\u002F",[81],[84,2135],{"url":2132},{"title":23,"searchDepth":24,"depth":24,"links":2137},[],"Artificial intelligence has long been celebrated for its uncanny ability to recognize patterns in vast datasets, yet it remains shackled by a static view of the world...",{"redirection":35,"thumbnail":2140,"tags":2142,"date":2143,"related":34},{"src":2141},"https:\u002F\u002Fquantumzeitgeist.com\u002Fwp-content\u002Fuploads\u002FPathway_Image.gif",[94,1508],"2025-08-03","\u002Fnews\u002Fpalo-alto-ai-firm-pathway-unveils-post-transformer-architecture-for-autonomous-ai",{"title":2120,"description":2138},{"loc":2144},"9.news\u002Fpalo-alto-ai-firm-pathway-unveils-post-transformer-architecture-for-autonomous-ai","WQeDlBSH3cB5p2RNG9D5Vy2VY9UeKCzeaY_PSlbKe48",{"id":2150,"title":2151,"author":2152,"body":2155,"description":23,"extension":27,"meta":2169,"navigation":35,"path":2174,"seo":2175,"sitemap":2176,"stem":2177,"__hash__":2178},"content\u002F9.news\u002Fpathway-bdh-brain-inspired-ai-architecture.md","Brain-inspired AI model 'BDH' may surpass the limits of Transformers",{"name":2153,"favicon":2154,"website":2154},"Radical Data Science","xenospectrum.com",{"type":13,"value":2156,"toc":2167},[2157,2159,2165],[68,2158,71],{"id":70},[73,2160,75,2161,82],{},[77,2162,2163],{"href":2163,"rel":2164},"https:\u002F\u002Fxenospectrum.com\u002Fpathway-bdh-brain-inspired-ai-architecture\u002F",[81],[84,2166],{"url":2163},{"title":23,"searchDepth":24,"depth":24,"links":2168},[],{"redirection":35,"lang":2170,"tags":2171,"date":1933,"related":34,"thumbnail":2172},"japanese",[94,1508],{"src":2173,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fbdh-brain-th.png","\u002Fnews\u002Fpathway-bdh-brain-inspired-ai-architecture",{"title":2151,"description":23},{"loc":2174},"9.news\u002Fpathway-bdh-brain-inspired-ai-architecture","anhqPMYzT_YDg9QuMSusksSNP0QA3ji7_a-N-I55N2I",{"id":2180,"title":2181,"author":2182,"body":2184,"description":2198,"extension":27,"meta":2199,"navigation":35,"path":2203,"seo":2204,"sitemap":2205,"stem":2206,"__hash__":2207},"content\u002F9.news\u002Fpathway-launches-a-new-post-transformer-architecture-that-paves-the-way-for-autonomous-ai.md","Pathway Launches a New “Post-Transformer” Architecture That Paves the Way for Autonomous AI",{"name":2153,"favicon":2183,"website":2183},"radicaldatascience.wordpress.com",{"type":13,"value":2185,"toc":2196},[2186,2188,2194],[68,2187,71],{"id":70},[73,2189,75,2190,82],{},[77,2191,2192],{"href":2192,"rel":2193},"https:\u002F\u002Fradicaldatascience.wordpress.com\u002F2025\u002F10\u002F01\u002Fpathway-launches-a-new-post-transformer-architecture-that-paves-the-way-for-autonomous-ai\u002F",[81],[84,2195],{"url":2192},{"title":23,"searchDepth":24,"depth":24,"links":2197},[],"Pathway, the data company building live AI that thinks in real-time like humans do, is today introducing Baby Dragon Hatchling (BDH), a new “post-Transformer” architecture that addresses one of the most significant barriers to autonomous artificial intelligence (AI): the inability to generalize over time.",{"redirection":35,"tags":2200,"date":1933,"related":34,"thumbnail":2201},[94,1508],{"src":2202,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fradicaldatascience-th.png","\u002Fnews\u002Fpathway-launches-a-new-post-transformer-architecture-that-paves-the-way-for-autonomous-ai",{"title":2181,"description":2198},{"loc":2203},"9.news\u002Fpathway-launches-a-new-post-transformer-architecture-that-paves-the-way-for-autonomous-ai","HGMgNwURx8yPCNcLMG1bpH528qBTicwS-SRIU67sxEE",{"id":2209,"title":2210,"author":2211,"body":2214,"description":23,"extension":27,"meta":2228,"navigation":35,"path":2233,"seo":2234,"sitemap":2235,"stem":2236,"__hash__":2237},"content\u002F9.news\u002Fpathway-launches-new-post-transformer-architecture-paving-the-way-for-autonomous-ai.md","Pathway launches new post-transformer architecture paving the way for autonomous AI",{"name":2212,"favicon":2213,"website":2213},"Intelligent CIO","intelligentcio.com",{"type":13,"value":2215,"toc":2226},[2216,2218,2224],[68,2217,71],{"id":70},[73,2219,75,2220,82],{},[77,2221,2222],{"href":2222,"rel":2223},"https:\u002F\u002Fwww.intelligentcio.com\u002Feu\u002F2025\u002F10\u002F03\u002Fpathway-launches-new-post-transformer-architecture-paving-the-way-for-autonomous-ai\u002F",[81],[84,2225],{"url":2222},{"title":23,"searchDepth":24,"depth":24,"links":2227},[],{"layout":90,"redirection":35,"tags":2229,"date":2230,"thumbnail":2231},[94,1508],"2025-10-03",{"src":2232,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fcio-th.png","\u002Fnews\u002Fpathway-launches-new-post-transformer-architecture-paving-the-way-for-autonomous-ai",{"title":2210,"description":23},{"loc":2233},"9.news\u002Fpathway-launches-new-post-transformer-architecture-paving-the-way-for-autonomous-ai","FDhfWdV94SJDDzqsN_sy0nVHuYcu9lLIxbR24fszV4E",{"id":2239,"title":2240,"author":2241,"body":2244,"description":2258,"extension":27,"meta":2259,"navigation":35,"path":2263,"seo":2264,"sitemap":2265,"stem":2266,"__hash__":2267},"content\u002F9.news\u002Fpathway-to-deliver-new-class-of-adaptive-and-continuously-learning-ai-systems-with-aws-and-nvidia-technologies.md","Pathway to Deliver New Class of Adaptive and Continuously Learning AI Systems with AWS and NVIDIA Technologies",{"name":2242,"favicon":2243,"website":2243},"businesswire","businesswire.com",{"type":13,"value":2245,"toc":2256},[2246,2248,2254],[68,2247,71],{"id":70},[73,2249,75,2250,82],{},[77,2251,2252],{"href":2252,"rel":2253},"https:\u002F\u002Fwww.businesswire.com\u002Fnews\u002Fhome\u002F20251201914013\u002Fen\u002FPathway-to-Deliver-New-Class-of-Adaptive-and-Continuously-Learning-AI-Systems-with-AWS-and-NVIDIA-Technologies",[81],[84,2255],{"url":2252},{"title":23,"searchDepth":24,"depth":24,"links":2257},[],"Pathway, the data company building live AI that thinks in real-time like humans do, today announced that its groundbreaking post-Transformer BDH",{"redirection":35,"thumbnail":2260,"tags":2262,"date":1543,"related":34},{"src":2261},"https:\u002F\u002Fmms.businesswire.com\u002Fmedia\u002F20251201914013\u002Fen\u002F2654091\u002F22\u002Fpathway-logo-black.jpg",[94,1508],"\u002Fnews\u002Fpathway-to-deliver-new-class-of-adaptive-and-continuously-learning-ai-systems-with-aws-and-nvidia-technologies",{"title":2240,"description":2258},{"loc":2263},"9.news\u002Fpathway-to-deliver-new-class-of-adaptive-and-continuously-learning-ai-systems-with-aws-and-nvidia-technologies","kxT730X_OFRGdaiENT4ckYUwoRKiQndZh9rvLIDbNfg",{"id":2269,"title":2270,"author":2271,"body":2274,"description":23,"extension":27,"meta":2288,"navigation":35,"path":2293,"seo":2294,"sitemap":2295,"stem":2296,"__hash__":2297},"content\u002F9.news\u002Fpathways-bdh-a-new-post-transformer-approach-to-enterprise-ai-on-aws.md","Pathway's BDH: a new post-transformer approach to enterprise AI, on AWS",{"name":2272,"website":2273,"favicon":2273},"AWS Startups","aws.amazon.com",{"type":13,"value":2275,"toc":2286},[2276,2278,2284],[68,2277,71],{"id":70},[73,2279,75,2280,82],{},[77,2281,2282],{"href":2282,"rel":2283},"https:\u002F\u002Faws.amazon.com\u002Fstartups\u002Flearn\u002Fpathways-bdh-a-new-post-transformer-approach-to-enterprise-ai-on-aws#overview",[81],[84,2285],{"url":2282},{"title":23,"searchDepth":24,"depth":24,"links":2287},[],{"redirection":35,"thumbnail":2289,"tags":2291,"date":2292,"related":34},{"src":2290},"https:\u002F\u002Fd22k7geae6sy8h.cloudfront.net\u002Ffiles\u002F69f214d3b5f460000b9fbc6f\u002FZuzanna-Stamirowska-CEO.jpg",[94,1508],"2026-05-03","\u002Fnews\u002Fpathways-bdh-a-new-post-transformer-approach-to-enterprise-ai-on-aws",{"title":2270,"description":23},{"loc":2293},"9.news\u002Fpathways-bdh-a-new-post-transformer-approach-to-enterprise-ai-on-aws","Q9-0JT7nBdpU4dTL17Llq3ha16UpMdPhLxWpLj7tVBk",{"id":2299,"title":2300,"author":2301,"body":2304,"description":23,"extension":27,"meta":2318,"navigation":35,"path":2323,"seo":2324,"sitemap":2325,"stem":2326,"__hash__":2327},"content\u002F9.news\u002Frevealing-the-first-biological-ai-a-step-closer-to-singularity copy.md","Revealing the First Biological AI: A Step Closer to Singularity",{"name":2302,"img":2303,"provider":11},"This Is The World","\u002Fassets\u002Fblog\u002Favatars\u002Fthisisworld-av.jpg",{"type":13,"value":2305,"toc":2316},[2306,2308,2314],[68,2307,71],{"id":70},[73,2309,75,2310,82],{},[77,2311,2312],{"href":2312,"rel":2313},"https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=6_v2HG8l9oA",[81],[84,2315],{"url":2312},{"title":23,"searchDepth":24,"depth":24,"links":2317},[],{"redirection":35,"thumbnail":2319,"tags":2321,"date":2322,"related":34},{"src":2320},"https:\u002F\u002Fimg.youtube.com\u002Fvi\u002F6_v2HG8l9oA\u002Fmaxresdefault.jpg",[94,1901,1508],"2025-10-04","\u002Fnews\u002Frevealing-the-first-biological-ai-a-step-closer-to-singularity-copy",{"title":2300,"description":23},{"loc":2323},"9.news\u002Frevealing-the-first-biological-ai-a-step-closer-to-singularity copy","Q0UmGNVVvIyL_F4mJw9AyduEZOvCtERvymlXZJKPgi0",{"id":2329,"title":2330,"author":2331,"body":2335,"description":23,"extension":27,"meta":2349,"navigation":35,"path":2354,"seo":2355,"sitemap":2356,"stem":2357,"__hash__":2358},"content\u002F9.news\u002Fsds-929-dragon-hatchling-the-missing-link-between-transformers-and-the-brain-with-adrian-kosowski.md","Dragon Hatchling: The Missing Link Between Transformers and the Brain, with Adrian Kosowski (SDS 929)",{"name":2332,"img":2333,"provider":11,"website":2334},"SuperDataScience","\u002Fassets\u002Fblog\u002Favatars\u002Fsuperdatascience-av.png","https:\u002F\u002Fsuperdatascience.com",{"type":13,"value":2336,"toc":2347},[2337,2339,2345],[68,2338,71],{"id":70},[73,2340,75,2341,82],{},[77,2342,2343],{"href":2343,"rel":2344},"https:\u002F\u002Fwww.superdatascience.com\u002Fpodcast\u002Fsds-929-dragon-hatchling-the-missing-link-between-transformers-and-the-brain-with-adrian-kosowski",[81],[84,2346],{"url":2343},{"title":23,"searchDepth":24,"depth":24,"links":2348},[],{"redirection":35,"thumbnail":2350,"tags":2352,"date":2353,"related":34},{"src":2351,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fsds-th.png",[94,1901,1508,1574],"2025-10-07","\u002Fnews\u002Fsds-929-dragon-hatchling-the-missing-link-between-transformers-and-the-brain-with-adrian-kosowski",{"title":2330,"description":23},{"loc":2354},"9.news\u002Fsds-929-dragon-hatchling-the-missing-link-between-transformers-and-the-brain-with-adrian-kosowski","oKO9WhSiXreqFhQqDdrmZ04yM-jsRQuHRvkCekRItzI",{"id":2360,"title":2361,"author":2362,"body":2365,"description":2379,"extension":27,"meta":2380,"navigation":35,"path":2385,"seo":2386,"sitemap":2387,"stem":2388,"__hash__":2389},"content\u002F9.news\u002Fsecond-most-popular-ai-paper-of-the-year-in-2025.md","Second most popular AI paper of the year in 2025",{"name":2363,"favicon":2364,"website":2364},"Hugging Face","huggingface.co",{"type":13,"value":2366,"toc":2377},[2367,2374],[73,2368,2369,2373],{},[77,2370,1551],{"href":2371,"rel":2372},"https:\u002F\u002Fhuggingface.co\u002Fpapers\u002F2509.26507",[81]," ranked 2 in the Top 10 most upvoted papers on HuggingFace!",[1331,2375],{"src":2376},"https:\u002F\u002Fx.com\u002FHuggingPapers\u002Fstatus\u002F2005312316829516222",{"title":23,"searchDepth":24,"depth":24,"links":2378},[],"The Dragon Hatchling: The Missing Link between the Transformer and Models of the Brain ranked 2 in the Top 10 most upvoted papers on HuggingFace!",{"layout":90,"tags":2381,"date":2382,"thumbnail":2383},[94,1508,1574],"2025-12-28",{"src":2384,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fsecond-most-popular-ai-paper-of-the-year-in-2025-th.jpg","\u002Fnews\u002Fsecond-most-popular-ai-paper-of-the-year-in-2025",{"title":2361,"description":2379},{"loc":2385},"9.news\u002Fsecond-most-popular-ai-paper-of-the-year-in-2025","O76w_xzQUU73uCqwdwHuUpPSvqe6Cge7qu4Jg_HIcis",{"id":2391,"title":2392,"author":2393,"body":2394,"description":2408,"extension":27,"meta":2409,"navigation":35,"path":2414,"seo":2415,"sitemap":2416,"stem":2417,"__hash__":2418},"content\u002F9.news\u002Ftech-predictions-2026.md","Tech That Will Change Your Life in 2026",{"name":1521,"img":1522,"provider":11,"website":1523},{"type":13,"value":2395,"toc":2406},[2396,2398,2404],[68,2397,71],{"id":70},[73,2399,75,2400,82],{},[77,2401,2402],{"href":2402,"rel":2403},"https:\u002F\u002Fwww.wsj.com\u002Ftech\u002Fai\u002Ftech-predictions-2026-6884d6b0",[81],[84,2405],{"url":2402},{"title":23,"searchDepth":24,"depth":24,"links":2407},[],"Folding iPhones, home robots, mind-reading tech and EV supercars are all heading your way—along with AI-induced challenges in healthcare and cybersecurity",{"redirection":35,"thumbnail":2410,"tags":2412,"date":2413,"related":34},{"src":2411},"https:\u002F\u002Fimages.wsj.net\u002Fim-07951141",[94],"2025-12-26","\u002Fnews\u002Ftech-predictions-2026",{"title":2392,"description":2408},{"loc":2414},"9.news\u002Ftech-predictions-2026","zcrriogVSZqpygyVsvzRKJYvh7Q9-q11_c_9K0xUawQ",{"id":2420,"title":2421,"author":2422,"body":2425,"description":2439,"extension":27,"meta":2440,"navigation":35,"path":2445,"seo":2446,"sitemap":2447,"stem":2448,"__hash__":2449},"content\u002F9.news\u002Fthat-hint-where-ai-is-heading.md","That Hint Where AI Is Heading",{"name":2423,"favicon":2424,"website":2424},"Turing Post","turingpost.com",{"type":13,"value":2426,"toc":2437},[2427,2429,2435],[68,2428,71],{"id":70},[73,2430,75,2431,82],{},[77,2432,2433],{"href":2433,"rel":2434},"https:\u002F\u002Fwww.turingpost.com\u002Fp\u002Ffod133",[81],[84,2436],{"url":2433},{"title":23,"searchDepth":24,"depth":24,"links":2438},[],"Research Papers That Hint Where AI Is Heading with commentary from their authors + foundational books to read this holiday season",{"redirection":35,"thumbnail":2441,"tags":2443,"date":2444,"related":34},{"src":2442},"https:\u002F\u002Fbeehiiv-images-production.s3.amazonaws.com\u002Fuploads\u002Fasset\u002Ffile\u002F644e5fdd-ea96-4dcf-b286-6783c793e66b\u002FFrame_328.png?t=1767048275",[94,1508],"2025-12-29","\u002Fnews\u002Fthat-hint-where-ai-is-heading",{"title":2421,"description":2439},{"loc":2445},"9.news\u002Fthat-hint-where-ai-is-heading","30G9CIiNUUUHYjfZAQjHHGvjaHQIKc_udisd7jiyvV0",{"id":2451,"title":1551,"author":2452,"body":2453,"description":2466,"extension":27,"meta":2467,"navigation":35,"path":2473,"seo":2474,"sitemap":2475,"stem":2476,"__hash__":2477},"content\u002F9.news\u002Fthe-dragon-hatchling-the-missing-link-between-the-transformer-and-models-of-the-brain.md",{"name":2363,"favicon":2364,"website":2364},{"type":13,"value":2454,"toc":2464},[2455,2457,2462],[68,2456,71],{"id":70},[73,2458,75,2459,82],{},[77,2460,2371],{"href":2371,"rel":2461},[81],[84,2463],{"url":2371},{"title":23,"searchDepth":24,"depth":24,"links":2465},[],"BDH, a biologically inspired Large Language Model, combines scale-free network architecture with Hebbian learning to achieve Transformer-like performance while maintaining interpretability.",{"redirection":35,"thumbnail":2468,"tags":2470,"date":2472,"related":34},{"src":2469,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fhugging-face-th.png",[94,1508,2471],"developer","2025-09-30","\u002Fnews\u002Fthe-dragon-hatchling-the-missing-link-between-the-transformer-and-models-of-the-brain",{"title":1551,"description":2466},{"loc":2473},"9.news\u002Fthe-dragon-hatchling-the-missing-link-between-the-transformer-and-models-of-the-brain","Flg7qhuKdidFidYKH_gkSUYbCxLzU7Tw4XW-eSSh_aw",{"id":2479,"title":2480,"author":2481,"body":2482,"description":2496,"extension":27,"meta":2497,"navigation":35,"path":2502,"seo":2503,"sitemap":2504,"stem":2505,"__hash__":2506},"content\u002F9.news\u002Fthe-post-transformer-era-ais-next-frontier-nyu-x-pathway.md","The Post-Transformer Era: AI's Next Frontier | NYU x Pathway",{"id":7,"url":8,"name":9,"img":10,"provider":11},{"type":13,"value":2483,"toc":2494},[2484,2486,2492],[68,2485,71],{"id":70},[73,2487,75,2488,82],{},[77,2489,2490],{"href":2490,"rel":2491},"https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=o9o7fU_ZSIE",[81],[84,2493],{"url":2490},{"title":23,"searchDepth":24,"depth":24,"links":2495},[],"What comes after Transformers? The Transformer architecture behind GPT has dominated AI for nearly a decade. But cracks are showing. Transformer-based models have no continuous learning (frozen in time, like Groundhog Day), limited context windows, and compute costs that spiral as reasoning gets longer",{"redirection":35,"thumbnail":2498,"tags":2500,"date":2501,"related":34},{"src":2499,"contain":35},"https:\u002F\u002Fimg.youtube.com\u002Fvi\u002Fo9o7fU_ZSIE\u002Fmaxresdefault.jpg",[1901,237,1508,1574],"2026-02-06","\u002Fnews\u002Fthe-post-transformer-era-ais-next-frontier-nyu-x-pathway",{"title":2480,"description":2496},{"loc":2502},"9.news\u002Fthe-post-transformer-era-ais-next-frontier-nyu-x-pathway","mN733gsUUhmvogT0mUjyz3YYryuttQH9FMXUgoLt4R8",{"id":2508,"title":2509,"author":2510,"body":2513,"description":23,"extension":27,"meta":2527,"navigation":35,"path":2532,"seo":2533,"sitemap":2534,"stem":2535,"__hash__":2536},"content\u002F9.news\u002Fthis-ai-grows-a-brain-during-training.md","This AI Grows a Brain During Training (Pathway's AI w\u002F Zuzanna Stamirowska)",{"name":2511,"img":2512,"provider":11},"The Neuron","\u002Fassets\u002Fblog\u002Favatars\u002Fthe-neuron-av.jpg",{"type":13,"value":2514,"toc":2525},[2515,2517,2523],[68,2516,71],{"id":70},[73,2518,75,2519,82],{},[77,2520,2521],{"href":2521,"rel":2522},"https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=duw7RUif8hE",[81],[84,2524],{"url":2521},{"title":23,"searchDepth":24,"depth":24,"links":2526},[],{"redirection":35,"thumbnail":2528,"tags":2530,"date":2531,"related":34},{"src":2529,"contain":35,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fthis-ai-grows-a-brain-during-training-th.jpg",[94,1901,1508],"2026-01-06","\u002Fnews\u002Fthis-ai-grows-a-brain-during-training",{"title":2509,"description":23},{"loc":2532},"9.news\u002Fthis-ai-grows-a-brain-during-training","iINZWFKKkoI_5EHwc3ne2p1qKZgsI1bJNVcQqsJjB5Q",{"id":2538,"title":2539,"author":2540,"body":2543,"description":2539,"extension":27,"meta":2557,"navigation":35,"path":2563,"seo":2564,"sitemap":2565,"stem":2566,"__hash__":2567},"content\u002F9.news\u002Ftransdev-and-pathway-partner.md","Transdev and Pathway partner to improve mobility and public transport performance through LiveAI™",{"name":2541,"img":2542},"transdev","https:\u002F\u002Fmedia.glassdoor.com\u002Fsql\u002F413452\u002Ftransdev-squareLogo-1702746543089.png",{"type":13,"value":2544,"toc":2555},[2545,2547,2553],[68,2546,71],{"id":70},[73,2548,75,2549,82],{},[77,2550,2551],{"href":2551,"rel":2552},"https:\u002F\u002Fwww.transdev.com\u002Fen\u002Fpress-release\u002Ftransdev-and-pathway-partner\u002F",[81],[84,2554],{"url":2551},{"title":23,"searchDepth":24,"depth":24,"links":2556},[],{"redirection":35,"layout":2558,"aside":34,"single":35,"date":2559,"thumbnail":2560,"tags":2562,"hidden":35},"default","2025-04-23",{"src":2561,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Ftransdev-and-pathway-partner-th.png",[94,632],"\u002Fnews\u002Ftransdev-and-pathway-partner",{"title":2539,"description":2539},{"loc":2563},"9.news\u002Ftransdev-and-pathway-partner","iSWhreDrP5ZyMx5utQQycKkAegfOyjGU1vCtTo70000",{"id":2569,"title":2539,"author":2570,"body":2571,"description":2660,"extension":27,"meta":2661,"navigation":35,"path":2665,"seo":2666,"sitemap":2667,"stem":2668,"__hash__":2669},"content\u002F9.news\u002Ftransdev-pathway-live-ai-public-transport-mobility.md",{"id":7,"url":8,"name":9,"img":10,"provider":11},{"type":13,"value":2572,"toc":2655},[2573,2588,2591,2594,2598,2601,2604,2612,2620,2627,2633,2636,2640,2648,2652],[73,2574,2575,2578,2579,2581,2582,2587],{},[169,2576,2577],{},"Issy-les-Moulineaux (France), April 17, 2025"," - ",[77,2580,712],{"href":711},", the data company that builds LiveAI™, and ",[77,2583,2586],{"href":2584,"rel":2585},"https:\u002F\u002Fwww.transdev.com",[81],"Transdev",", a global daily mobility solutions provider, today announce a strategic partnership to transform public transport network operations by integrating innovative real-time AI frameworks at their heart.",[73,2589,2590],{},"With its operations and systems constantly generating more and more data, Transdev needed a solution capable of processing real-time information, combined with historical data, to address mobility challenges in various territories.",[73,2592,2593],{},"Pathway, which enables continuous learning via an efficient and scalable data engine to create LiveAI™ systems that think and learn in real time, allows Transdev to leverage AI systems powered by live data pipelines. This delivers insights and supports decision-making based on always up-to-date knowledge, which is crucial for the transportation sector.\nThe Relevance of AI: Transforming Simple Geolocation Data into Reli",[140,2595,2597],{"id":2596},"the-relevance-of-ai-transforming-simple-geolocation-data-into-reliable-real-time-passenger-information","The Relevance of AI: Transforming Simple Geolocation Data into Reliable Real-Time Passenger Information",[73,2599,2600],{},"For public transport applications, Pathway's LiveAI™ technology is specifically applied to geospatial and temporal data. For example, it enables live analytics of how vehicles are moving through a city in real time. Other data regarding the operation or functioning of vehicles can also be analyzed through AI.",[73,2602,2603],{},"The benefits of applying LiveAI™ to transport operations include:",[145,2605,2606,2609],{},[148,2607,2608],{},"Increased operational efficiency: More accurate and reliable predictions of arrival times, enhanced dynamic management of planned and unplanned diversions and disruptions, and new high-performance tools for operators.",[148,2610,2611],{},"Improved customer experience: Providing reliable, real-time passenger information, both in normal and disrupted situations to optimize user experience. The automated management of new arrival times when routes are diverted also reduces passenger waiting times and inconvenience.",[2613,2614,2617],"quote",{"name":2615,"title":2616},"Edouard Hénaut","CEO France at Transdev",[73,2618,2619],{},"Pathway shows that real-time data processing and AI integrate seamlessly into our chain of business tools. The solution complements our operating and passenger information systems, without adding complexity or implementation delays, while guaranteeing reliable results. Effectively ensuring daily mobility requires expert use of the real-time data we produce, transform, and deliver to both passengers and local authorities clients. Our partnership with Pathway strengthens our expertise in this area and enables significant gains for the benefit of service quality.",[2613,2621,2624],{"name":2622,"title":2623},"Laurent Mahieu","Director of the Hauts-de-France and Grand-Est Regions at Transdev, President of DK’BUS",[73,2625,2626],{},"The experimentation on the DK’BUS network, which serves the Urban Community of Dunkirk, has shown significant gains in terms of information quality. The accuracy and reliability of the information delivered has improved, enabling the disappearance of theoretical arrival times, better prediction of waiting time estimations and efficient dynamic management of unscheduled deviations. We look forward to deploying and offering this quality of information to our passengers daily. The DK’Bus team, led by General Manager Nicolas Gaillard, already has great ideas to introduce it to our travelers! Pathway is undeniably the high-performance solution for monitoring and visualizing our activity to become even more reactive and continuously improve the data we produce.",[2613,2628,2630],{"name":314,"title":2629},"CEO and co-founder of Pathway",[73,2631,2632],{},"Transportation is a dynamic industry, and an understanding of the real-time situation is critical for navigating mobility challenges. Applying AI and the most modern data processing to these challenges makes a very tangible difference to communities. We are proud to provide AI that reduces wait times and unpredictability, in turn attracting more people to public transport and boosting quality of life for residents of cities around the world.",[73,2634,2635],{},"Pathway's technology is proven with over 46,000 installations and users and is supported through the ZEBOX ecosystem that gathers companies like CMA CGM, Transdev or VINCI.  Pathway now also delivers for major clients such as NATO and La Poste. The company recently raised $10 million in seed funding and has a growing community of developers based in over 100 countries.",[140,2637,2639],{"id":2638},"about-transdev","About Transdev",[73,2641,2642,2643],{},"Operator and leading independent private mobility group, Transdev empowers freedom to move every day thanks to safe, reliable and innovative solutions that serve the common good. Present in 19 countries, Transdev transports an average of 12.8 million passengers daily, operating all transportation modes and resolutely committed to the ecological transition. The Group employs more than 105,000 women and men serving its passengers, consolidating its position as the world leader in public transportation. Transdev advises and supports local authorities and companies in a long-term partnership. Transdev is jointly owned by Caisse des Dépôts (66%) and the Rethmann Group (34%). In 2024, Transdev reported sales of €10.05 billion. For more information: ",[77,2644,2647],{"href":2645,"rel":2646},"http:\u002F\u002Fwww.transdev.com",[81],"www.transdev.com",[140,2649,2651],{"id":2650},"about-pathway","About Pathway",[2653,2654],"pathway-about",{},{"title":23,"searchDepth":24,"depth":24,"links":2656},[2657,2658,2659],{"id":2596,"depth":24,"text":2597},{"id":2638,"depth":24,"text":2639},{"id":2650,"depth":24,"text":2651},"Transdev and Pathway announce a strategic partnership to revolutionize public transport using LiveAI™, enhancing real-time passenger information and operational efficiency across mobility networks",{"layout":90,"thumbnail":2662,"tags":2664,"date":1748,"hidden":35},{"src":2663,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Ftransdev-pathway-live-ai-public-transport-mobility-th.png",[94],"\u002Fnews\u002Ftransdev-pathway-live-ai-public-transport-mobility",{"title":2539,"description":2660},{"loc":2665},"9.news\u002Ftransdev-pathway-live-ai-public-transport-mobility","-2bRmDWuA_uvKMq9WgwncU77QQFzmUcluM7MjqQjhSw",{"id":2671,"title":2672,"author":2673,"body":2676,"description":2690,"extension":27,"meta":2691,"navigation":35,"path":2696,"seo":2697,"sitemap":2698,"stem":2699,"__hash__":2700},"content\u002F9.news\u002Fvictor-szczerba-assumes-cco-role-at-pathway-post-funding.md","Victor Szczerba assumes CCO role at Pathway post funding",{"name":2674,"img":2675,"provider":11},"ITBrief","\u002Fassets\u002Fblog\u002Favatars\u002Fitbrief-av.png",{"type":13,"value":2677,"toc":2688},[2678,2680,2686],[68,2679,71],{"id":70},[73,2681,75,2682,82],{},[77,2683,2684],{"href":2684,"rel":2685},"https:\u002F\u002Fitbrief.news\u002Fstory\u002Fvictor-szczerba-assumes-cco-role-at-pathway-post-funding",[81],[84,2687],{"url":2684},{"title":23,"searchDepth":24,"depth":24,"links":2689},[],"Victor Szczerba has been appointed Chief Commercial Officer at Pathway, following its USD $10 million funding, to enhance its LiveAI™ technology and strategies",{"redirection":35,"layout":2558,"date":2692,"thumbnail":2693,"tags":2695,"hidden":35},"2025-04-01",{"src":2694,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fvictor-szczerba-assumes-cco-role-at-pathway-post-funding-th.png",[94],"\u002Fnews\u002Fvictor-szczerba-assumes-cco-role-at-pathway-post-funding",{"title":2672,"description":2690},{"loc":2696},"9.news\u002Fvictor-szczerba-assumes-cco-role-at-pathway-post-funding","hueHB0Zx7yKl2eyqycaLbit82Ii2T_Ioe1fcjPCmw84",{"id":2702,"title":2703,"author":2704,"body":2707,"description":2721,"extension":27,"meta":2722,"navigation":35,"path":2727,"seo":2728,"sitemap":2729,"stem":2730,"__hash__":2731},"content\u002F9.news\u002Fwhat-sudoku-reveals-about-the-limits-of-llms.md","What Sudoku reveals about the limits of LLMs",{"name":2705,"favicon":2706,"website":2706},"techradar","www.techradar.com",{"type":13,"value":2708,"toc":2719},[2709,2711,2717],[68,2710,71],{"id":70},[73,2712,75,2713,82],{},[77,2714,2715],{"href":2715,"rel":2716},"https:\u002F\u002Fwww.techradar.com\u002Fpro\u002Fwhat-sudoku-reveals-about-the-limits-of-llms",[81],[84,2718],{"url":2715},{"title":23,"searchDepth":24,"depth":24,"links":2720},[],"LLM failure to solve reasoning puzzles exposes deep architectural limits",{"redirection":35,"thumbnail":2723,"tags":2725,"date":2726,"related":34},{"src":2724,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fh8ZQHernNUVpnGYX7QnxVM-650-80.jpg.webp",[94,1508],"2026-05-26","\u002Fnews\u002Fwhat-sudoku-reveals-about-the-limits-of-llms",{"title":2703,"description":2721},{"loc":2727},"9.news\u002Fwhat-sudoku-reveals-about-the-limits-of-llms","hcmLB5ayzZGQmxCYjSz_4xrItcs9tfc_K_LF0ekLXaI",{"id":2733,"title":2734,"author":2735,"body":2736,"description":2750,"extension":27,"meta":2751,"navigation":35,"path":2756,"seo":2757,"sitemap":2758,"stem":2759,"__hash__":2760},"content\u002F9.news\u002Fwhat-the-transformer-vs-post-transformer-debate-revealed-about-ais-next-architecture.md","What the Transformer vs. Post-Transformer debate revealed about AI's next architecture",{"name":2511,"img":2512,"provider":11},{"type":13,"value":2737,"toc":2748},[2738,2740,2746],[68,2739,71],{"id":70},[73,2741,75,2742,82],{},[77,2743,2744],{"href":2744,"rel":2745},"https:\u002F\u002Fwww.theneuron.ai\u002Fexplainer-articles\u002Fwhat-the-transformer-vs-post-transformer-debate-revealed-about-ais-next-architecture\u002F",[81],[84,2747],{"url":2744},{"title":23,"searchDepth":24,"depth":24,"links":2749},[],"Pathway put Lukasz Kaiser, Llion Jones, Mathias Lechner, and Adrian Kosowski in a boxing ring to debate whether Transformers still deserve the belt. The answer was less about one winner and more about the tests that will decide AI's next architecture.",{"redirection":35,"thumbnail":2752,"tags":2754,"date":2755,"related":34},{"src":2753,"contain":35},"https:\u002F\u002Fimg.youtube.com\u002Fvi\u002FhCjoMLuCuLQ\u002Fmaxresdefault.jpg",[1901,237,1508,1574],"2026-05-19","\u002Fnews\u002Fwhat-the-transformer-vs-post-transformer-debate-revealed-about-ais-next-architecture",{"title":2734,"description":2750},{"loc":2756},"9.news\u002Fwhat-the-transformer-vs-post-transformer-debate-revealed-about-ais-next-architecture","2IS9pynX9Z_FMvhQYhLI-OwmAeNsqS4OG5mM7SfDsKo",{"id":2762,"title":2763,"author":2764,"body":2767,"description":2781,"extension":27,"meta":2782,"navigation":35,"path":2787,"seo":2788,"sitemap":2789,"stem":2790,"__hash__":2791},"content\u002F9.news\u002Fwhy-continual-learning-and-memory-matters-more-than-data-in-the-next-generation-of-ai.md","Why continual learning and memory matters more than data in the next generation of AI",{"name":2765,"favicon":2766,"website":2766},"Express Computer","expresscomputer.in",{"type":13,"value":2768,"toc":2779},[2769,2771,2777],[68,2770,71],{"id":70},[73,2772,75,2773,82],{},[77,2774,2775],{"href":2775,"rel":2776},"https:\u002F\u002Fwww.expresscomputer.in\u002Fguest-blogs\u002Fwhy-continual-learning-and-memory-matters-more-than-data-in-the-next-generation-of-ai\u002F135038\u002F",[81],[84,2778],{"url":2775},{"title":23,"searchDepth":24,"depth":24,"links":2780},[],"The next leap in AI will not come from large models that wake up with amnesia. It will come from architectures that can learn through experience.",{"redirection":35,"thumbnail":2783,"tags":2785,"date":2786,"related":34},{"src":2784,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fzuzanna-stamirowska-co-founder-and-ceo-of-pathway-interview-series-th.png",[1508],"2026-05-14","\u002Fnews\u002Fwhy-continual-learning-and-memory-matters-more-than-data-in-the-next-generation-of-ai",{"title":2763,"description":2781},{"loc":2787},"9.news\u002Fwhy-continual-learning-and-memory-matters-more-than-data-in-the-next-generation-of-ai","oxJ-R9lKt3iL601F1Fr0bwJQYNJ3jnvJv1f-aZ8Qe6Y",{"id":2793,"title":2794,"author":2795,"body":2798,"description":2812,"extension":27,"meta":2813,"navigation":35,"path":2818,"seo":2819,"sitemap":2820,"stem":2821,"__hash__":2822},"content\u002F9.news\u002Fwhy-the-future-of-ai-will-go-beyond-transformers.md","Why the Future of AI Will Go Beyond Transformers",{"name":2796,"favicon":2797,"website":2797},"Analytics India Magazine","analyticsindiamag.com",{"type":13,"value":2799,"toc":2810},[2800,2802,2808],[68,2801,71],{"id":70},[73,2803,75,2804,82],{},[77,2805,2806],{"href":2806,"rel":2807},"https:\u002F\u002Fanalyticsindiamag.com\u002Fai-features\u002Fwhy-the-future-of-ai-will-go-beyond-transformers",[81],[84,2809],{"url":2806},{"title":23,"searchDepth":24,"depth":24,"links":2811},[],"AI models have scaled significantly, but their transformative impact has diminished over time. Enterprises face challenges with AI due to a high incidence of negative outcomes linked to inaccurate outputs...",{"redirection":35,"thumbnail":2814,"tags":2816,"date":2817,"related":34},{"src":2815},"https:\u002F\u002Fzdpdvwhvukelzzbzbjvh.supabase.co\u002Fstorage\u002Fv1\u002Fobject\u002Fpublic\u002Fimported-images\u002F1769104092206-bc45f05c-6f89-43dc-bfa6-39b431842c69-bu1zp.webp?width=1200&quality=60&format=avif",[1508],"2026-04-23","\u002Fnews\u002Fwhy-the-future-of-ai-will-go-beyond-transformers",{"title":2794,"description":2812},{"loc":2818},"9.news\u002Fwhy-the-future-of-ai-will-go-beyond-transformers","p6Iix9ua03eB3AocxFuQFy_5fZEugQ4yK8xmZTrgK0U",{"id":2824,"title":2825,"author":2826,"body":2829,"description":2843,"extension":27,"meta":2844,"navigation":35,"path":2849,"seo":2850,"sitemap":2851,"stem":2852,"__hash__":2853},"content\u002F9.news\u002Fwhy-todays-ai-struggles-with-the-real-world-and-what-comes-next.md","Why today’s AI struggles with the real world, and what comes next",{"name":2827,"favicon":2828,"website":2828},"ET Edge Insights","etedge-insights.com",{"type":13,"value":2830,"toc":2841},[2831,2833,2839],[68,2832,71],{"id":70},[73,2834,75,2835,82],{},[77,2836,2837],{"href":2837,"rel":2838},"https:\u002F\u002Fetedge-insights.com\u002Ftechnology\u002Fartificial-intelligence\u002Fwhy-todays-ai-struggles-with-the-real-world-and-what-comes-next\u002F",[81],[84,2840],{"url":2837},{"title":23,"searchDepth":24,"depth":24,"links":2842},[],"Why AI's biggest challenge is memory—and how post-transformer architectures could enable continuous learning",{"redirection":35,"thumbnail":2845,"tags":2847,"date":2848,"related":34},{"src":2846},"https:\u002F\u002Fetedge-insights.com\u002Fwp-content\u002Fuploads\u002F2025\u002F12\u002FAI-Quantum.jpg",[94,1508],"2026-03-12","\u002Fnews\u002Fwhy-todays-ai-struggles-with-the-real-world-and-what-comes-next",{"title":2825,"description":2843},{"loc":2849},"9.news\u002Fwhy-todays-ai-struggles-with-the-real-world-and-what-comes-next","i1OIYyBg13TYXC4qX5A8Y1fpFdrsyAjtlY8tEwLcbj0",{"id":2855,"title":2856,"author":2857,"body":2860,"description":23,"extension":27,"meta":2874,"navigation":35,"path":2877,"seo":2878,"sitemap":2879,"stem":2880,"__hash__":2881},"content\u002F9.news\u002Fzuzanna-stamirowska-co-founder-and-ceo-of-pathway-interview-series.md","Zuzanna Stamirowska, Co-Founder and CEO of Pathway – Interview Series",{"name":2858,"favicon":2859,"website":2859},"Unite AI","unite.ai",{"type":13,"value":2861,"toc":2872},[2862,2864,2870],[68,2863,71],{"id":70},[73,2865,75,2866,82],{},[77,2867,2868],{"href":2868,"rel":2869},"https:\u002F\u002Fwww.unite.ai\u002Fzuzanna-stamirowska-co-founder-and-ceo-of-pathway-interview-series\u002F",[81],[84,2871],{"url":2868},{"title":23,"searchDepth":24,"depth":24,"links":2873},[],{"layout":90,"redirection":35,"tags":2875,"date":2079,"thumbnail":2876},[94],{"src":2784,"provider":11},"\u002Fnews\u002Fzuzanna-stamirowska-co-founder-and-ceo-of-pathway-interview-series",{"title":2856,"description":23},{"loc":2877},"9.news\u002Fzuzanna-stamirowska-co-founder-and-ceo-of-pathway-interview-series","__oquUzUBg3yWehTG8IOEd0HmtaYY207jjOBr4uJnvA",{"id":2883,"title":2884,"author":51,"body":2885,"description":2921,"extension":27,"meta":2922,"navigation":35,"path":2923,"seo":2924,"sitemap":2925,"stem":2926,"__hash__":2927},"content\u002Fframework\u002Fblog\u002F1.index.md","Blog",{"type":13,"value":2886},[2887,2891,2906,2918],[68,2888,2890],{"id":2889},"in-the-news","In the news",[73,2892,2893,2894,2899,2900,2905],{},"Read about ",[1291,2895,2898],{"className":2896},[2897],"text-primary-500","Pathway’s"," latest ",[1291,2901,2904],{"className":2902},[2903],"text-secondary-500","media mentions, press releases"," and more!",[2907,2908,2915],"modal",{"className":2909,"name":2914},[800,801,133,2910,2911,2912,2913],"my-4","mb-10","block","text-base","Newsletter",[73,2916,2917],{},"Subscribe to our newsletter!",[2919,2920],"articles",{},"Read about Pathway’s latest media mentions, press releases and more!",{"layout":2558,"aside":34,"toc":34,"single":35},"\u002Fframework\u002Fblog",{"title":2884,"description":2921},{"loc":2923},"framework\u002Fblog\u002F1.index","shdvdCYJ6w8bdpejuMQbMXk7dIJ7X57V5A2-R5_Ixek",{"id":2929,"title":2930,"author":2931,"body":2932,"description":3079,"extension":27,"meta":3080,"navigation":35,"path":3086,"seo":3087,"sitemap":3088,"stem":3089,"__hash__":3090},"content\u002Fframework\u002Fblog\u002F1.pathway-open-beta-announced.md","Pathway Live Data Framework is now in Open Beta",{"id":312,"url":313,"name":314,"description":315,"img":316,"provider":11,"linkedin":317},{"type":13,"value":2933,"toc":3075},[2934,2938,2957,2961,2964,2967,2981,2986,2989,2992,3003,3006,3013,3017,3020,3034,3039,3046,3053,3065],[68,2935,2937],{"id":2936},"pathway-live-data-framework-is-now-available-to-all-developers","Pathway Live Data Framework is now available to all developers!",[73,2939,2940,2941,2947,2948,2951],{},"The Pathway Live Data Framework - the stream processing framework which takes care of data updates for you - announces a ",[77,2942,2943],{"href":1438},[2944,2945,2946],"b",{},"$4.5M funding round",", and opens to all developers. You can try it out in a cloud notebook directly from your browser, or run it on a local Linux machine.",[2949,2950],"br",{},[77,2952,2956],{"className":2953,"bold":23,"size":2955,"href":692},[2954,800,801],"mb-0!","large","Run it now",[140,2958,2960],{"id":2959},"why-should-you-use-the-pathway-live-data-framework","Why should you use the Pathway Live Data Framework?",[73,2962,2963],{},"The Pathway Live Data Framework is a programming framework which allows you to work with streaming data as if you were working with static data, in batch mode.",[73,2965,2966],{},"Have you ever tried to make sense of streaming data? If so, there is a high chance that you encountered at least one of these issues:",[145,2968,2969,2972,2975,2978],{},[148,2970,2971],{},"There are these annoying data updates that need to be taken care of",[148,2973,2974],{},"One needs to use the same logic to handle real-time and historical data",[148,2976,2977],{},"Debugging is a nightmare, because how can you debug something against unknown data?",[148,2979,2980],{},"Not to mention applying proper Machine Learning on top of streaming data to draw business insights from it. Business insights, which are necessary for key decision-making.",[73,2982,2983],{},[597,2984],{"alt":23,"src":2985},"\u002Fassets\u002Fcontent\u002Fblog\u002Fdifficulties-streaming-data.svg",[73,2987,2988],{},"If these are problems you have been up against, you are in the right place.\nAt Pathway, we design the Pathway Live Data Framework which quietly takes care of data updates for you.",[73,2990,2991],{},"It gives you:",[145,2993,2994,2997,3000],{},[148,2995,2996],{},"A native real-time approach. Every task is either real-time streaming or streaming with historical data (backfilling), no need for batch, no hacks required.",[148,2998,2999],{},"Reactivity.",[148,3001,3002],{},"Full power of Python (to make all your ML dreams come true) with an extra SQL syntax layer coming soon (to make sure all data engineers are happy with their Pathway Live Data Framework pipelines, too).",[73,3004,3005],{},"In the design of the framework's streaming engine, we opted for ease-of-use and scalability.",[2613,3007,3010],{"name":3008,"title":3009},"Lukasz Kaiser","Co-author of Tensor Flow and co-inventor of Transformers, now at OpenAI - and an angel investor in Pathway.",[73,3011,3012],{},"In Machine Learning, the key to success of a programming framework is how to combine usability with scalability. This was the axis of competition between Google's TensorFlow and Facebook's PyTorch during the deep learning revolution. Today, Pathway has taken into account the lessons learned during this battle of giants, and embedded them in the compiler of its real-time data processing framework.",[140,3014,3016],{"id":3015},"what-does-all-this-mean-in-practice-for-a-developer","What does all this mean in practice, for a developer?",[73,3018,3019],{},"That you can write as if you were writing a batch data processing pipeline (well, it's actually a little more than that, as we support loops and iteration!) and have it run on streaming data.",[73,3021,3022,3023,3027,3028,3033],{},"For a start, check out this simple ",[77,3024,3026],{"href":3025},"\u002Fdevelopers\u002Ftemplates\u002Fetl\u002Flsh_chapter1","example of classification of handwritten digits",". All of it is captured by the code below. We approach this task with a classifier from the framework's standard library, in this case, k-Nearest-Neighbors (",[77,3029,3032],{"href":3030,"rel":3031},"https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FK-nearest_neighbors_algorithm",[81],"read more on Wikipedia","). The framework builds up the corresponding control flow graph, and updates it in streaming mode.",[73,3035,3036],{},[597,3037],{"alt":23,"src":3038},"\u002Fassets\u002Fcontent\u002Fblog\u002Fclassification-control-flow-pathway.svg",[73,3040,3041,3042,694],{},"Using Pathway Live Data Framework means that all Machine Learning outcomes are updated as the models learn with new samples and improve over time. Classification decisions for tested elements will also be revisited whenever they change. Such an approach is called reactive processing of streaming data. If you would like to learn more about this topic, we explain it in detail in ",[77,3043,3045],{"href":3044},"\u002Fblog\u002Fpydata","this fresh video talk",[73,3047,3048,3049,694],{},"You will also find many more examples in our Documentation - and we are also sharing with you the whole examples pack at ",[77,3050,3051],{"href":3051,"rel":3052},"https:\u002F\u002Fgithub.com\u002Fpathwaycom\u002Fpathway-examples",[81],[73,3054,3055,3056,3059,3060,3064],{},"All of this is now open for you to play with it, test, and have fun. You can even ",[77,3057,3058],{"href":692},"run it"," in a cloud notebook from your browser, unless you prefer to ",[3061,3062,3063],"code",{},"pip install"," directly on to your own Linux machine.",[73,3066,3067,3068],{},"If you have some feedback on Pathway Live Data Framework, or just some streaming use cases that are leaving you with sleepless nights, we would love to know.\n",[169,3069,3070,3074],{},[77,3071,3073],{"href":3072},"https:\u002F\u002Fdiscord.com\u002Finvite\u002Fpathway","Join us on Discord"," or drop us a line!",{"title":23,"searchDepth":24,"depth":24,"links":3076},[3077,3078],{"id":2959,"depth":24,"text":2960},{"id":3015,"depth":24,"text":3016},"Pathway Live Data Framework - the streaming programming framework which takes care of data updates for you - is now available to all developers.",{"layout":90,"thumbnail":3081,"tags":3083,"date":3085,"hidden":35},{"src":3082,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fpathway-thumbnail.gif",[90,3084],"open beta","2022-12-05","\u002Fframework\u002Fblog\u002Fpathway-open-beta-announced",{"title":2930,"description":3079},{"loc":3086},"framework\u002Fblog\u002F1.pathway-open-beta-announced","_sKlP-AVnGXkM0NhwA6uKL7TIGmLisNzlThXD4sXKC0",{"id":3092,"title":3093,"author":3094,"body":3095,"description":5041,"extension":27,"meta":5042,"navigation":35,"path":5061,"seo":5062,"sitemap":5063,"stem":5064,"__hash__":5065},"content\u002Fframework\u002Fblog\u002F1001.gemini-rag.md","Multimodal RAG with Gemini",{"id":7,"url":8,"name":9,"img":10,"provider":11},{"type":13,"value":3096,"toc":5022},[3097,3102,3106,3117,3131,3148,3154,3158,3184,3188,3193,3199,3205,3210,3224,3228,3231,3245,3249,3260,3263,3269,3273,3287,3290,3304,3308,3314,3317,3320,3324,3331,3337,3340,3343,3347,3351,3354,3357,3361,3393,3396,3400,3406,3410,3417,3459,3463,3466,3487,3510,3714,3718,3721,3919,3923,3930,3937,3953,4047,4051,4054,4154,4158,4162,4165,4259,4263,4278,4351,4355,4358,4544,4548,4551,4628,4632,4635,4710,4714,4717,4782,4872,4942,4994,5002,5005,5009,5012,5018],[3098,3099],"true-img",{"alt":3100,"src":3101},"blog banner","\u002Fassets\u002Fcontent\u002Fshowcases\u002Fgemini_rag\u002FBlog_Banner.png",[68,3103,3105],{"id":3104},"multimodal-rag-with-pathway-and-gemini","Multimodal RAG with Pathway and Gemini",[73,3107,3108,3109,3112,3113,3116],{},"The recent release of ",[169,3110,3111],{},"Google Gemini 1.5",", with its impressive ",[169,3114,3115],{},"1 million token context length window",", has sparked discussions about the future of RAG. However, it hasn't rendered it obsolete. This system still offers unique advantages, especially in curating and optimizing the context provided to the model, ensuring relevance and accuracy. What is particularly interesting is how these advancements can be harnessed to enhance our projects and streamline our workflows.",[73,3118,3119,3120,3123,3124,3126,3127,3130],{},"In this article, you'll learn how to set up a ",[169,3121,3122],{},"Multimodal Retrieval-Augmented Generation (MM-RAG)"," system using ",[169,3125,712],{}," and ",[169,3128,3129],{},"Google Gemini",". You will walk through each step comprehensively, ensuring a solid understanding of both the theoretical and practical aspects of implementing Multimodal LLM and RAG applications.",[73,3132,3133,3134,3126,3137,3139,3140,3143,3144,694],{},"You'll explore how to leverage the capabilities of ",[169,3135,3136],{},"Gemini 1.5 Flash",[169,3138,712],{}," together. If you're interested in building RAG pipelines with OpenAI, we also have an article on ",[169,3141,3142],{},"Multimodal RAG using GPT-4o",", which you can check out ",[77,3145,3147],{"href":3146},"\u002Fdevelopers\u002Ftemplates\u002Frag\u002Fmultimodal-rag","here",[73,3149,3150,3151,694],{},"If you want to skip the explanations, you can directly find the code ",[77,3152,3147],{"href":3153},"#hands-on-multimodal-rag-with-google-gemini",[140,3155,3157],{"id":3156},"what-this-article-will-cover","What this article will cover:",[145,3159,3160,3163,3166,3169,3172,3175,3178,3181],{},[148,3161,3162],{},"What is Retrieval-Augmented Generation (RAG)?",[148,3164,3165],{},"Multimodality in LLMs",[148,3167,3168],{},"Why is Multimodal RAG (MM-RAG) Needed?",[148,3170,3171],{},"What is Multimodal RAG and Use Cases?",[148,3173,3174],{},"Gemini Models",[148,3176,3177],{},"Release of Gemini 1.5 and its impact on RAG architectures",[148,3179,3180],{},"Comparing LlamaIndex and Pathway",[148,3182,3183],{},"Hands-on Multimodal RAG with Google Gemini",[140,3185,3187],{"id":3186},"foundational-concepts","Foundational Concepts",[3189,3190,3192],"h3",{"id":3191},"why-is-multimodal-rag-needed","Why is Multimodal Rag needed?",[73,3194,3195,3198],{},[169,3196,3197],{},"Retrieval-Augmented Generation (RAG)"," enhances large language models by incorporating external knowledge sources before generating responses. This approach ensures relevant and accurate output. In today's data-rich world, documents often combine text and images to convey information comprehensively. However, most Retrieval Augmented Generation (RAG) systems overlook the valuable insights locked within images. As Multimodal Large Language Models (LLMs) gain prominence, it's crucial to explore how we can leverage visual content alongside text in RAG, unlocking a deeper understanding of the information landscape.",[73,3200,3201,3204],{},[169,3202,3203],{},"Multimodal RAG"," is an advanced form of Retrieval-Augmented Generation (RAG) that goes beyond text to incorporate various data types like images, charts, and tables. This expanded capability allows for a deeper understanding of complex information, leading to more accurate and informative outputs.",[3206,3207,3209],"h4",{"id":3208},"two-options-for-multimodal-rag","Two options for Multimodal RAG",[665,3211,3212,3218],{},[148,3213,3214,3217],{},[169,3215,3216],{},"Multimodal Embeddings"," -\nThe multimodal embeddings model generates vectors based on the input you provide, which can include a combination of image, text, and video data. The image embedding vector and text embedding vector are in the same semantic space with the same dimensionality. Consequently, these vectors can be used interchangeably for use cases like searching image by text, or searching video by image.\nUtilize multimodal embeddings to integrate text and images, retrieve relevant content through similarity search, and then provide both the raw image and text chunks to a multimodal LLM for answer synthesis.",[148,3219,3220,3223],{},[169,3221,3222],{},"Text Embeddings"," -\nGenerate text summaries of images using a multimodal LLM, embed and retrieve the text, and then pass the text chunks to the LLM for answer synthesis.",[3206,3225,3227],{"id":3226},"comparing-text-based-and-multimodal-rag","Comparing text-based and multimodal RAG",[73,3229,3230],{},"Multimodal RAG offers several advantages over text-based RAG:",[145,3232,3233,3239],{},[148,3234,3235,3238],{},[169,3236,3237],{},"Enhanced knowledge access",": Multimodal RAG can access and process both textual and visual information, providing a richer and more comprehensive knowledge base for the LLM.",[148,3240,3241,3244],{},[169,3242,3243],{},"Improved reasoning capabilities",": By incorporating visual cues, multimodal RAG can make better informed inferences across different types of data modalities.",[3206,3246,3248],{"id":3247},"key-advantages-of-mm-rag","Key Advantages of MM-RAG:",[145,3250,3251,3254,3257],{},[148,3252,3253],{},"Comprehensive Understanding: Processes multiple data formats for a better picture.",[148,3255,3256],{},"Improved Performance: Visual data enhances efficiency in complex tasks.",[148,3258,3259],{},"Versatile Applications: Useful in finance, healthcare, scientific research, and more.",[3189,3261,3174],{"id":3262},"gemini-models",[73,3264,3265,3268],{},[169,3266,3267],{},"Gemini"," is Google's most capable and general AI model to date. Google has released several Gemini model variants, each tailored for different use cases and performance requirements.",[3206,3270,3272],{"id":3271},"main-gemini-models","Main Gemini Models:",[145,3274,3275,3278,3281,3284],{},[148,3276,3277],{},"Gemini Ultra: The most powerful and advanced model, capable of handling complex tasks and offering state-of-the-art performance.",[148,3279,3280],{},"Gemini Pro: A versatile model that balances performance and efficiency, suitable for a wide range of applications.",[148,3282,3283],{},"Gemini Advanced: Designed for a broader set of tasks, offering a good balance of capabilities.",[148,3285,3286],{},"Gemini Lite: A smaller, more efficient model focused on speed and responsiveness, ideal for resource-constrained environments.",[73,3288,3289],{},"Additional Variants:",[145,3291,3292,3295,3298,3301],{},[148,3293,3294],{},"Gemini 1.5 Flash: Optimized for high-volume, cost-effective applications.",[148,3296,3297],{},"Gemini 1.5 Pro: Offers a balance of performance and capabilities.",[148,3299,3300],{},"Gemini 1.0 Pro Vision: Includes vision capabilities for processing images and videos.",[148,3302,3303],{},"Gemini 1.0 Pro: Text-based model for general language tasks.",[3206,3305,3307],{"id":3306},"benefits-of-building-with-gemini","Benefits of Building with Gemini:",[73,3309,3310,3313],{},[169,3311,3312],{},"Free Credits",": Google Cloud offers new users up to $300 in free credits. This can be used to experiment with Gemini models and other Google Cloud services.\nYou can also seamlessly integrate MM-RAG applications with Google's Vertex AI platform for streamlined machine learning workflows.",[3189,3315,3177],{"id":3316},"release-of-gemini-15-and-its-impact-on-rag-architectures",[73,3318,3319],{},"The Gemini 1.5 Flash model, released on May 24, 2024, revolutionized AI with its enhanced speed, efficiency, cost-effectiveness, long context window, and multimodal reasoning capabilities.",[3206,3321,3323],{"id":3322},"did-google-gemini-15-kill-the-need-of-rag","Did Google Gemini 1.5 Kill the need of RAG?",[73,3325,3326,3327,3330],{},"In one word ",[169,3328,3329],{},"“No”",". Gemini 1.5, with a 1M context length window, has sparked a new debate about whether RAG (Retrieval Augmented Generation) is still relevant or not. LLMs commonly struggle with hallucination. To address this challenge, two solutions were introduced, one involving an increased context window and the other utilizing RAG. Gemini 1.5 outperforms Claude 2.1 and GPT-4 Turbo as it can assimilate entire code bases, process over 100 papers, and various documents, but it surely hasn’t killed RAG.",[73,3332,3333,3334,694],{},"RAG leverages your private knowledge database for effective Q&A while ensuring the security of sensitive information like trade secrets, confidential IP, GDPR-protected data, and internal documents. For more detailed insights explore our article on Private RAG with Connected Data Sources using Mistral, Ollama, and Pathway ",[77,3335,3147],{"href":3336},"\u002Fdevelopers\u002Ftemplates\u002Frag\u002Fprivate-rag-ollama-mistral",[73,3338,3339],{},"Additionally in traditional RAG pipelines, you can enhance performance by tweaking the retrieval process, changing the embedding model, adjusting chunking strategies, or improving source data. However, with a \"stuff-the-context-window-1M-tokens\" strategy, your only option is to improve the source data since all data is given to the model within the token limit. Additionally the context window may be filled with many relevant facts, but 40% or more of them are “lost” to the model. If you want to make sure the model is actually using the context you are sending it, you are best off curating it first and only sending the most relevant context. In other words, doing traditional RAG.",[73,3341,3342],{},"Here in this template you will use the Gemini 1.5 Flash but you can also use other multimodal models by gemini accordingly.",[3098,3344],{"alt":3345,"src":3346},"Gemini 1.5 flash overview","\u002Fassets\u002Fcontent\u002Fshowcases\u002Fgemini_rag\u002Fgemini1.5flashtable.png",[3189,3348,3350],{"id":3349},"multimodality-with-gemini-15-flash","Multimodality with Gemini-1.5-Flash",[73,3352,3353],{},"Gemini 1.5 Flash is the newest addition to the Gemini family of large language models, and it’s specifically designed to be fast, efficient, and cost-effective for high-volume tasks. This is achieved by being a lighter model than the Gemini 1.5 Pro.",[73,3355,3356],{},"According to the paper from Google DeepMind, Gemini 1.5 Flash is “a more lightweight variant designed for efficiency with minimal regression in quality” and uses the transformer decoder model architecture “and multimodal capabilities as Gemini 1.5 Pro, designed for efficient utilization of tensor processing units (TPUs) with lower latency for model serving.”",[3189,3358,3360],{"id":3359},"gemini-15-flash-key-features","Gemini 1.5 Flash: Key Features",[145,3362,3363,3369,3375,3381,3387],{},[148,3364,3365,3368],{},[169,3366,3367],{},"Speed and Efficiency",": Fastest Gemini model at 60 tokens\u002Fsecond, ideal for real-time tasks, reducing costs by delaying autoscaling.",[148,3370,3371,3374],{},[169,3372,3373],{},"Cost-Effective",": 1\u002F10 the price of Gemini 1.5 Pro and cheaper than GPT-3.5.",[148,3376,3377,3380],{},[169,3378,3379],{},"Long Context Window",": Processes up to one million tokens, handling one hour of video, 11 hours of audio, or 700,000 words without losing accuracy.",[148,3382,3383,3386],{},[169,3384,3385],{},"Multimodal Reasoning",": Understands text, images, audio, video, PDFs, and tables. Supports function calling and real-time data access.",[148,3388,3389,3392],{},[169,3390,3391],{},"Great Performance",": High performance with large context windows, excelling in long-document QA, long-video QA, and long-context ASR.",[3098,3394],{"alt":3345,"src":3395},"\u002Fassets\u002Fcontent\u002Fshowcases\u002Fgemini_rag\u002Fgemini1.5flashdetails.png",[140,3397,3399],{"id":3398},"hands-on-multimodal-rag-with-google-gemini","Hands on Multimodal RAG with Google Gemini",[73,3401,3402],{},[597,3403],{"alt":3404,"src":3405},"Gemini RAG overview","\u002Fassets\u002Fcontent\u002Fshowcases\u002Fgemini_rag\u002FRAG_diagram.png",[3189,3407,3409],{"id":3408},"step-1-installation","Step 1: Installation",[73,3411,3412,3413,3416],{},"First, we need to install the required packages: pathway",[1291,3414,3415],{},"all",", litellm==1.40.0 and google-generativeai.",[3418,3419,3423],"pre",{"className":3420,"code":3421,"language":3422,"meta":23,"style":23},"language-python shiki shiki-themes material-theme-palenight","!pip install 'pathway[all]>=0.14.0' litellm==1.40.0\n","python",[3061,3424,3425],{"__ignoreMap":23},[1291,3426,3429,3433,3437,3441,3443,3446,3449,3453,3455],{"class":3427,"line":3428},"line",1,[1291,3430,3432],{"class":3431},"s0W1g","!pip install ",[1291,3434,3436],{"class":3435},"sAklC","'",[1291,3438,3440],{"class":3439},"sfyAc","pathway[all]>=0.14.0",[1291,3442,3436],{"class":3435},[1291,3444,3445],{"class":3431}," litellm",[1291,3447,3448],{"class":3435},"==",[1291,3450,3452],{"class":3451},"sx098","1.40",[1291,3454,694],{"class":3435},[1291,3456,3458],{"class":3457},"s-wAU","0\n",[3189,3460,3462],{"id":3461},"step-2-imports-and-environment-setup","Step 2: Imports and Environment Setup",[73,3464,3465],{},"Next, we import the necessary libraries and set up the environment variables.",[3418,3467,3469],{"className":3420,"code":3468,"language":3422,"meta":23,"style":23},"import logging\nimport os\n",[3061,3470,3471,3480],{"__ignoreMap":23},[1291,3472,3473,3477],{"class":3427,"line":3428},[1291,3474,3476],{"class":3475},"s6cf3","import",[1291,3478,3479],{"class":3431}," logging\n",[1291,3481,3482,3484],{"class":3427,"line":24},[1291,3483,3476],{"class":3475},[1291,3485,3486],{"class":3431}," os\n",[3418,3488,3490],{"className":3420,"code":3489,"language":3422,"meta":23,"style":23},"import google.generativeai as genai\n",[3061,3491,3492],{"__ignoreMap":23},[1291,3493,3494,3496,3499,3501,3504,3507],{"class":3427,"line":3428},[1291,3495,3476],{"class":3475},[1291,3497,3498],{"class":3431}," google",[1291,3500,694],{"class":3435},[1291,3502,3503],{"class":3457},"generativeai",[1291,3505,3506],{"class":3475}," as",[1291,3508,3509],{"class":3431}," genai\n",[3418,3511,3513],{"className":3420,"code":3512,"language":3422,"meta":23,"style":23},"import litellm\n\nimport pathway as pw\n\nfrom pathway.udfs import DiskCache, ExponentialBackoffRetryStrategy\nfrom pathway.xpacks.llm import embedders, llms, parsers, prompts, splitters\nfrom pathway.xpacks.llm.question_answering import BaseRAGQuestionAnswerer\nfrom pathway.xpacks.llm.vector_store import VectorStoreServer\n\n# Set the logging level for LiteLLM to DEBUG\nos.environ[\"LITELLM_LOG\"] = \"DEBUG\"  # to help in debugging\n",[3061,3514,3515,3522,3527,3540,3545,3570,3612,3638,3663,3668,3675],{"__ignoreMap":23},[1291,3516,3517,3519],{"class":3427,"line":3428},[1291,3518,3476],{"class":3475},[1291,3520,3521],{"class":3431}," litellm\n",[1291,3523,3524],{"class":3427,"line":24},[1291,3525,3526],{"emptyLinePlaceholder":35},"\n",[1291,3528,3529,3531,3534,3537],{"class":3427,"line":675},[1291,3530,3476],{"class":3475},[1291,3532,3533],{"class":3431}," pathway ",[1291,3535,3536],{"class":3475},"as",[1291,3538,3539],{"class":3431}," pw\n",[1291,3541,3543],{"class":3427,"line":3542},4,[1291,3544,3526],{"emptyLinePlaceholder":35},[1291,3546,3548,3551,3554,3556,3559,3561,3564,3567],{"class":3427,"line":3547},5,[1291,3549,3550],{"class":3475},"from",[1291,3552,3553],{"class":3431}," pathway",[1291,3555,694],{"class":3435},[1291,3557,3558],{"class":3431},"udfs ",[1291,3560,3476],{"class":3475},[1291,3562,3563],{"class":3431}," DiskCache",[1291,3565,3566],{"class":3435},",",[1291,3568,3569],{"class":3431}," ExponentialBackoffRetryStrategy\n",[1291,3571,3573,3575,3577,3579,3582,3584,3587,3589,3592,3594,3597,3599,3602,3604,3607,3609],{"class":3427,"line":3572},6,[1291,3574,3550],{"class":3475},[1291,3576,3553],{"class":3431},[1291,3578,694],{"class":3435},[1291,3580,3581],{"class":3431},"xpacks",[1291,3583,694],{"class":3435},[1291,3585,3586],{"class":3431},"llm ",[1291,3588,3476],{"class":3475},[1291,3590,3591],{"class":3431}," embedders",[1291,3593,3566],{"class":3435},[1291,3595,3596],{"class":3431}," llms",[1291,3598,3566],{"class":3435},[1291,3600,3601],{"class":3431}," parsers",[1291,3603,3566],{"class":3435},[1291,3605,3606],{"class":3431}," prompts",[1291,3608,3566],{"class":3435},[1291,3610,3611],{"class":3431}," splitters\n",[1291,3613,3615,3617,3619,3621,3623,3625,3628,3630,3633,3635],{"class":3427,"line":3614},7,[1291,3616,3550],{"class":3475},[1291,3618,3553],{"class":3431},[1291,3620,694],{"class":3435},[1291,3622,3581],{"class":3431},[1291,3624,694],{"class":3435},[1291,3626,3627],{"class":3431},"llm",[1291,3629,694],{"class":3435},[1291,3631,3632],{"class":3431},"question_answering ",[1291,3634,3476],{"class":3475},[1291,3636,3637],{"class":3431}," BaseRAGQuestionAnswerer\n",[1291,3639,3641,3643,3645,3647,3649,3651,3653,3655,3658,3660],{"class":3427,"line":3640},8,[1291,3642,3550],{"class":3475},[1291,3644,3553],{"class":3431},[1291,3646,694],{"class":3435},[1291,3648,3581],{"class":3431},[1291,3650,694],{"class":3435},[1291,3652,3627],{"class":3431},[1291,3654,694],{"class":3435},[1291,3656,3657],{"class":3431},"vector_store ",[1291,3659,3476],{"class":3475},[1291,3661,3662],{"class":3431}," VectorStoreServer\n",[1291,3664,3666],{"class":3427,"line":3665},9,[1291,3667,3526],{"emptyLinePlaceholder":35},[1291,3669,3671],{"class":3427,"line":3670},10,[1291,3672,3674],{"class":3673},"saEQR","# Set the logging level for LiteLLM to DEBUG\n",[1291,3676,3678,3681,3683,3686,3689,3692,3695,3697,3700,3703,3706,3709,3711],{"class":3427,"line":3677},11,[1291,3679,3680],{"class":3431},"os",[1291,3682,694],{"class":3435},[1291,3684,3685],{"class":3457},"environ",[1291,3687,3688],{"class":3435},"[",[1291,3690,3691],{"class":3435},"\"",[1291,3693,3694],{"class":3439},"LITELLM_LOG",[1291,3696,3691],{"class":3435},[1291,3698,3699],{"class":3435},"]",[1291,3701,3702],{"class":3435}," =",[1291,3704,3705],{"class":3435}," \"",[1291,3707,3708],{"class":3439},"DEBUG",[1291,3710,3691],{"class":3435},[1291,3712,3713],{"class":3673},"  # to help in debugging\n",[3189,3715,3717],{"id":3716},"step-3-api-key-setup-and-license-key-setup","Step 3: API Key Setup and License Key Setup",[73,3719,3720],{},"Set up the API key and the Pathway license key:",[3418,3722,3724],{"className":3420,"code":3723,"language":3422,"meta":23,"style":23},"# Api key setup\nGEMINI_API_KEY = \"Paste your Gemini API Key here\"\n\nos.environ[\"GEMINI_API_KEY\"] = GEMINI_API_KEY\nos.environ[\"TESSDATA_PREFIX\"] = \"\u002Fusr\u002Fshare\u002Ftesseract\u002Ftessdata\u002F\"\ngenai.configure(api_key=GEMINI_API_KEY)\n\n# License key setup\npw.set_license_key(\"demo-license-key-with-telemetry\")\n\nlogging.basicConfig(\n    level=logging.INFO, format=\"%(asctime)s - %(levelname)s - %(message)s\"\n)\n",[3061,3725,3726,3731,3747,3751,3775,3803,3828,3832,3837,3858,3862,3875,3914],{"__ignoreMap":23},[1291,3727,3728],{"class":3427,"line":3428},[1291,3729,3730],{"class":3673},"# Api key setup\n",[1291,3732,3733,3736,3739,3741,3744],{"class":3427,"line":24},[1291,3734,3735],{"class":3431},"GEMINI_API_KEY ",[1291,3737,3738],{"class":3435},"=",[1291,3740,3705],{"class":3435},[1291,3742,3743],{"class":3439},"Paste your Gemini API Key here",[1291,3745,3746],{"class":3435},"\"\n",[1291,3748,3749],{"class":3427,"line":675},[1291,3750,3526],{"emptyLinePlaceholder":35},[1291,3752,3753,3755,3757,3759,3761,3763,3766,3768,3770,3772],{"class":3427,"line":3542},[1291,3754,3680],{"class":3431},[1291,3756,694],{"class":3435},[1291,3758,3685],{"class":3457},[1291,3760,3688],{"class":3435},[1291,3762,3691],{"class":3435},[1291,3764,3765],{"class":3439},"GEMINI_API_KEY",[1291,3767,3691],{"class":3435},[1291,3769,3699],{"class":3435},[1291,3771,3702],{"class":3435},[1291,3773,3774],{"class":3431}," GEMINI_API_KEY\n",[1291,3776,3777,3779,3781,3783,3785,3787,3790,3792,3794,3796,3798,3801],{"class":3427,"line":3547},[1291,3778,3680],{"class":3431},[1291,3780,694],{"class":3435},[1291,3782,3685],{"class":3457},[1291,3784,3688],{"class":3435},[1291,3786,3691],{"class":3435},[1291,3788,3789],{"class":3439},"TESSDATA_PREFIX",[1291,3791,3691],{"class":3435},[1291,3793,3699],{"class":3435},[1291,3795,3702],{"class":3435},[1291,3797,3705],{"class":3435},[1291,3799,3800],{"class":3439},"\u002Fusr\u002Fshare\u002Ftesseract\u002Ftessdata\u002F",[1291,3802,3746],{"class":3435},[1291,3804,3805,3808,3810,3814,3817,3821,3823,3825],{"class":3427,"line":3572},[1291,3806,3807],{"class":3431},"genai",[1291,3809,694],{"class":3435},[1291,3811,3813],{"class":3812},"sdLwU","configure",[1291,3815,3816],{"class":3435},"(",[1291,3818,3820],{"class":3819},"s7ZW3","api_key",[1291,3822,3738],{"class":3435},[1291,3824,3765],{"class":3812},[1291,3826,3827],{"class":3435},")\n",[1291,3829,3830],{"class":3427,"line":3614},[1291,3831,3526],{"emptyLinePlaceholder":35},[1291,3833,3834],{"class":3427,"line":3640},[1291,3835,3836],{"class":3673},"# License key setup\n",[1291,3838,3839,3842,3844,3847,3849,3851,3854,3856],{"class":3427,"line":3665},[1291,3840,3841],{"class":3431},"pw",[1291,3843,694],{"class":3435},[1291,3845,3846],{"class":3812},"set_license_key",[1291,3848,3816],{"class":3435},[1291,3850,3691],{"class":3435},[1291,3852,3853],{"class":3439},"demo-license-key-with-telemetry",[1291,3855,3691],{"class":3435},[1291,3857,3827],{"class":3435},[1291,3859,3860],{"class":3427,"line":3670},[1291,3861,3526],{"emptyLinePlaceholder":35},[1291,3863,3864,3867,3869,3872],{"class":3427,"line":3677},[1291,3865,3866],{"class":3431},"logging",[1291,3868,694],{"class":3435},[1291,3870,3871],{"class":3812},"basicConfig",[1291,3873,3874],{"class":3435},"(\n",[1291,3876,3878,3881,3883,3885,3887,3890,3892,3895,3897,3899,3902,3904,3907,3909,3912],{"class":3427,"line":3877},12,[1291,3879,3880],{"class":3819},"    level",[1291,3882,3738],{"class":3435},[1291,3884,3866],{"class":3812},[1291,3886,694],{"class":3435},[1291,3888,3889],{"class":3457},"INFO",[1291,3891,3566],{"class":3435},[1291,3893,3894],{"class":3819}," format",[1291,3896,3738],{"class":3435},[1291,3898,3691],{"class":3435},[1291,3900,3901],{"class":3451},"%(asctime)s",[1291,3903,2578],{"class":3439},[1291,3905,3906],{"class":3451},"%(levelname)s",[1291,3908,2578],{"class":3439},[1291,3910,3911],{"class":3451},"%(message)s",[1291,3913,3746],{"class":3435},[1291,3915,3917],{"class":3427,"line":3916},13,[1291,3918,3827],{"class":3435},[3189,3920,3922],{"id":3921},"step-4-upload-your-file","Step 4: Upload your file",[73,3924,3925,3926,3929],{},"Create a ",[3061,3927,3928],{},".\u002Fdata"," directory if it doesn't already exist. This is where the uploaded files will be stored. Then upload your pdf documents.",[73,3931,3932,3933,3936],{},"You can also omit this cell if you are running locally on your system - in that case create a ",[3061,3934,3935],{},"data"," folder in the current directory and copy the files and comment out this cell.",[3418,3938,3940],{"className":3420,"code":3939,"language":3422,"meta":23,"style":23},"!mkdir -p data\n",[3061,3941,3942],{"__ignoreMap":23},[1291,3943,3944,3947,3950],{"class":3427,"line":3428},[1291,3945,3946],{"class":3431},"!mkdir ",[1291,3948,3949],{"class":3435},"-",[1291,3951,3952],{"class":3431},"p data\n",[3418,3954,3956],{"className":3420,"code":3955,"language":3422,"meta":23,"style":23},"# Demo pdf for testing\n!wget -q -P .\u002Fdata\u002F https:\u002F\u002Fgithub.com\u002Fpathwaycom\u002Fllm-app\u002Fraw\u002Fmain\u002Ftemplates\u002Fmultimodal_rag\u002Fdata\u002F20230203_alphabet_10K.pdf\n",[3061,3957,3958,3963],{"__ignoreMap":23},[1291,3959,3960],{"class":3427,"line":3428},[1291,3961,3962],{"class":3673},"# Demo pdf for testing\n",[1291,3964,3965,3968,3970,3973,3975,3978,3981,3983,3985,3988,3991,3994,3996,3999,4001,4004,4006,4008,4010,4013,4015,4018,4020,4023,4025,4028,4030,4033,4035,4037,4039,4042,4044],{"class":3427,"line":24},[1291,3966,3967],{"class":3431},"!wget ",[1291,3969,3949],{"class":3435},[1291,3971,3972],{"class":3431},"q ",[1291,3974,3949],{"class":3435},[1291,3976,3977],{"class":3431},"P ",[1291,3979,3980],{"class":3435},".\u002F",[1291,3982,3935],{"class":3431},[1291,3984,711],{"class":3435},[1291,3986,3987],{"class":3431}," https",[1291,3989,3990],{"class":3435},":\u002F\u002F",[1291,3992,3993],{"class":3431},"github",[1291,3995,694],{"class":3435},[1291,3997,3998],{"class":3457},"com",[1291,4000,711],{"class":3435},[1291,4002,4003],{"class":3431},"pathwaycom",[1291,4005,711],{"class":3435},[1291,4007,3627],{"class":3431},[1291,4009,3949],{"class":3435},[1291,4011,4012],{"class":3431},"app",[1291,4014,711],{"class":3435},[1291,4016,4017],{"class":3431},"raw",[1291,4019,711],{"class":3435},[1291,4021,4022],{"class":3431},"main",[1291,4024,711],{"class":3435},[1291,4026,4027],{"class":3431},"templates",[1291,4029,711],{"class":3435},[1291,4031,4032],{"class":3431},"multimodal_rag",[1291,4034,711],{"class":3435},[1291,4036,3935],{"class":3431},[1291,4038,711],{"class":3435},[1291,4040,4041],{"class":3431},"20230203_alphabet_10K",[1291,4043,694],{"class":3435},[1291,4045,4046],{"class":3457},"pdf\n",[3206,4048,4050],{"id":4049},"reading-pdf-data","Reading PDF Data",[73,4052,4053],{},"Next, we read the PDF data from a folder.",[3418,4055,4057],{"className":3420,"code":4056,"language":3422,"meta":23,"style":23},"# Read the PDF data\nfolder = pw.io.fs.read(\n    path=\".\u002Fdata\u002F\",\n    format=\"binary\",\n    with_metadata=True,\n)\nsources = [folder]  # you can add any other Pathway connector here!\n",[3061,4058,4059,4064,4091,4108,4124,4132,4136],{"__ignoreMap":23},[1291,4060,4061],{"class":3427,"line":3428},[1291,4062,4063],{"class":3673},"# Read the PDF data\n",[1291,4065,4066,4069,4071,4074,4076,4079,4081,4084,4086,4089],{"class":3427,"line":24},[1291,4067,4068],{"class":3431},"folder ",[1291,4070,3738],{"class":3435},[1291,4072,4073],{"class":3431}," pw",[1291,4075,694],{"class":3435},[1291,4077,4078],{"class":3457},"io",[1291,4080,694],{"class":3435},[1291,4082,4083],{"class":3457},"fs",[1291,4085,694],{"class":3435},[1291,4087,4088],{"class":3812},"read",[1291,4090,3874],{"class":3435},[1291,4092,4093,4096,4098,4100,4103,4105],{"class":3427,"line":675},[1291,4094,4095],{"class":3819},"    path",[1291,4097,3738],{"class":3435},[1291,4099,3691],{"class":3435},[1291,4101,4102],{"class":3439},".\u002Fdata\u002F",[1291,4104,3691],{"class":3435},[1291,4106,4107],{"class":3435},",\n",[1291,4109,4110,4113,4115,4117,4120,4122],{"class":3427,"line":3542},[1291,4111,4112],{"class":3819},"    format",[1291,4114,3738],{"class":3435},[1291,4116,3691],{"class":3435},[1291,4118,4119],{"class":3439},"binary",[1291,4121,3691],{"class":3435},[1291,4123,4107],{"class":3435},[1291,4125,4126,4129],{"class":3427,"line":3547},[1291,4127,4128],{"class":3819},"    with_metadata",[1291,4130,4131],{"class":3435},"=True,\n",[1291,4133,4134],{"class":3427,"line":3572},[1291,4135,3827],{"class":3435},[1291,4137,4138,4141,4143,4146,4149,4151],{"class":3427,"line":3614},[1291,4139,4140],{"class":3431},"sources ",[1291,4142,3738],{"class":3435},[1291,4144,4145],{"class":3435}," [",[1291,4147,4148],{"class":3431},"folder",[1291,4150,3699],{"class":3435},[1291,4152,4153],{"class":3673},"  # you can add any other Pathway connector here!\n",[3189,4155,4157],{"id":4156},"step-5-document-processing-and-question-answering-setup","Step 5: Document Processing and Question Answering Setup",[3206,4159,4161],{"id":4160},"setting-up-litellm-chat","Setting Up LiteLLM Chat",[73,4163,4164],{},"Set up a LiteLLM chat instance with retry and cache strategies:",[3418,4166,4168],{"className":3420,"code":4167,"language":3422,"meta":23,"style":23},"# Setup LiteLLM chat\nchat = llms.LiteLLMChat(\n    model=\"gemini\u002Fgemini-1.5-flash\",  # Model specified for LiteLLM\n    retry_strategy=ExponentialBackoffRetryStrategy(max_retries=6, backoff_factor=2.5),\n    temperature=0.0,\n)\n",[3061,4169,4170,4175,4191,4210,4243,4255],{"__ignoreMap":23},[1291,4171,4172],{"class":3427,"line":3428},[1291,4173,4174],{"class":3673},"# Setup LiteLLM chat\n",[1291,4176,4177,4180,4182,4184,4186,4189],{"class":3427,"line":24},[1291,4178,4179],{"class":3431},"chat ",[1291,4181,3738],{"class":3435},[1291,4183,3596],{"class":3431},[1291,4185,694],{"class":3435},[1291,4187,4188],{"class":3812},"LiteLLMChat",[1291,4190,3874],{"class":3435},[1291,4192,4193,4196,4198,4200,4203,4205,4207],{"class":3427,"line":675},[1291,4194,4195],{"class":3819},"    model",[1291,4197,3738],{"class":3435},[1291,4199,3691],{"class":3435},[1291,4201,4202],{"class":3439},"gemini\u002Fgemini-1.5-flash",[1291,4204,3691],{"class":3435},[1291,4206,3566],{"class":3435},[1291,4208,4209],{"class":3673},"  # Model specified for LiteLLM\n",[1291,4211,4212,4215,4217,4220,4222,4225,4227,4230,4232,4235,4237,4240],{"class":3427,"line":3542},[1291,4213,4214],{"class":3819},"    retry_strategy",[1291,4216,3738],{"class":3435},[1291,4218,4219],{"class":3812},"ExponentialBackoffRetryStrategy",[1291,4221,3816],{"class":3435},[1291,4223,4224],{"class":3819},"max_retries",[1291,4226,3738],{"class":3435},[1291,4228,4229],{"class":3451},"6",[1291,4231,3566],{"class":3435},[1291,4233,4234],{"class":3819}," backoff_factor",[1291,4236,3738],{"class":3435},[1291,4238,4239],{"class":3451},"2.5",[1291,4241,4242],{"class":3435},"),\n",[1291,4244,4245,4248,4250,4253],{"class":3427,"line":3547},[1291,4246,4247],{"class":3819},"    temperature",[1291,4249,3738],{"class":3435},[1291,4251,4252],{"class":3451},"0.0",[1291,4254,4107],{"class":3435},[1291,4256,4257],{"class":3427,"line":3572},[1291,4258,3827],{"class":3435},[3206,4260,4262],{"id":4261},"setting-up-embedder","Setting Up Embedder",[73,4264,4265,4266,4269,4270,4273,4274,4277],{},"Let's utilize Gemini embedders. The ",[3061,4267,4268],{},"GeminiEmbedder"," class in Pathway provides an interface for interacting with Gemini embedders. It generates semantic embeddings with a specified model, providing methods for single items (",[3061,4271,4272],{},"embed","), batches (",[3061,4275,4276],{},"embed_batch","), and direct calls.",[3418,4279,4281],{"className":3420,"code":4280,"language":3422,"meta":23,"style":23},"# Setup embedder\nembedder = embedders.GeminiEmbedder(\n    model=\"models\u002Fembedding-001\",\n    retry_strategy=ExponentialBackoffRetryStrategy(max_retries=6, backoff_factor=2.5),\n)  # Specify embedder here\n",[3061,4282,4283,4288,4303,4318,4344],{"__ignoreMap":23},[1291,4284,4285],{"class":3427,"line":3428},[1291,4286,4287],{"class":3673},"# Setup embedder\n",[1291,4289,4290,4293,4295,4297,4299,4301],{"class":3427,"line":24},[1291,4291,4292],{"class":3431},"embedder ",[1291,4294,3738],{"class":3435},[1291,4296,3591],{"class":3431},[1291,4298,694],{"class":3435},[1291,4300,4268],{"class":3812},[1291,4302,3874],{"class":3435},[1291,4304,4305,4307,4309,4311,4314,4316],{"class":3427,"line":675},[1291,4306,4195],{"class":3819},[1291,4308,3738],{"class":3435},[1291,4310,3691],{"class":3435},[1291,4312,4313],{"class":3439},"models\u002Fembedding-001",[1291,4315,3691],{"class":3435},[1291,4317,4107],{"class":3435},[1291,4319,4320,4322,4324,4326,4328,4330,4332,4334,4336,4338,4340,4342],{"class":3427,"line":3542},[1291,4321,4214],{"class":3819},[1291,4323,3738],{"class":3435},[1291,4325,4219],{"class":3812},[1291,4327,3816],{"class":3435},[1291,4329,4224],{"class":3819},[1291,4331,3738],{"class":3435},[1291,4333,4229],{"class":3451},[1291,4335,3566],{"class":3435},[1291,4337,4234],{"class":3819},[1291,4339,3738],{"class":3435},[1291,4341,4239],{"class":3451},[1291,4343,4242],{"class":3435},[1291,4345,4346,4348],{"class":3427,"line":3547},[1291,4347,713],{"class":3435},[1291,4349,4350],{"class":3673},"  # Specify embedder here\n",[3206,4352,4354],{"id":4353},"setting-up-parser","Setting Up Parser",[73,4356,4357],{},"Next, we set up a parser for the document store.",[3418,4359,4361],{"className":3420,"code":4360,"language":3422,"meta":23,"style":23},"# Setup parser\ntable_args = {\n    \"parsing_algorithm\": \"llm\",  # for tables\n    \"llm\": chat,\n    \"prompt\": prompts.DEFAULT_MD_TABLE_PARSE_PROMPT,\n}\n\nimage_args = {\n    \"parsing_algorithm\": \"llm\",  # for images\n    \"llm\": chat,\n    \"prompt\": prompts.DEFAULT_IMAGE_PARSE_PROMPT,\n}\n\nparser = parsers.DoclingParser(multimodal_llm=chat)\n",[3061,4362,4363,4368,4378,4402,4417,4437,4442,4446,4455,4476,4490,4509,4513,4517],{"__ignoreMap":23},[1291,4364,4365],{"class":3427,"line":3428},[1291,4366,4367],{"class":3673},"# Setup parser\n",[1291,4369,4370,4373,4375],{"class":3427,"line":24},[1291,4371,4372],{"class":3431},"table_args ",[1291,4374,3738],{"class":3435},[1291,4376,4377],{"class":3435}," {\n",[1291,4379,4380,4383,4386,4388,4391,4393,4395,4397,4399],{"class":3427,"line":675},[1291,4381,4382],{"class":3435},"    \"",[1291,4384,4385],{"class":3439},"parsing_algorithm",[1291,4387,3691],{"class":3435},[1291,4389,4390],{"class":3435},":",[1291,4392,3705],{"class":3435},[1291,4394,3627],{"class":3439},[1291,4396,3691],{"class":3435},[1291,4398,3566],{"class":3435},[1291,4400,4401],{"class":3673},"  # for tables\n",[1291,4403,4404,4406,4408,4410,4412,4415],{"class":3427,"line":3542},[1291,4405,4382],{"class":3435},[1291,4407,3627],{"class":3439},[1291,4409,3691],{"class":3435},[1291,4411,4390],{"class":3435},[1291,4413,4414],{"class":3431}," chat",[1291,4416,4107],{"class":3435},[1291,4418,4419,4421,4424,4426,4428,4430,4432,4435],{"class":3427,"line":3547},[1291,4420,4382],{"class":3435},[1291,4422,4423],{"class":3439},"prompt",[1291,4425,3691],{"class":3435},[1291,4427,4390],{"class":3435},[1291,4429,3606],{"class":3431},[1291,4431,694],{"class":3435},[1291,4433,4434],{"class":3457},"DEFAULT_MD_TABLE_PARSE_PROMPT",[1291,4436,4107],{"class":3435},[1291,4438,4439],{"class":3427,"line":3572},[1291,4440,4441],{"class":3435},"}\n",[1291,4443,4444],{"class":3427,"line":3614},[1291,4445,3526],{"emptyLinePlaceholder":35},[1291,4447,4448,4451,4453],{"class":3427,"line":3640},[1291,4449,4450],{"class":3431},"image_args ",[1291,4452,3738],{"class":3435},[1291,4454,4377],{"class":3435},[1291,4456,4457,4459,4461,4463,4465,4467,4469,4471,4473],{"class":3427,"line":3665},[1291,4458,4382],{"class":3435},[1291,4460,4385],{"class":3439},[1291,4462,3691],{"class":3435},[1291,4464,4390],{"class":3435},[1291,4466,3705],{"class":3435},[1291,4468,3627],{"class":3439},[1291,4470,3691],{"class":3435},[1291,4472,3566],{"class":3435},[1291,4474,4475],{"class":3673},"  # for images\n",[1291,4477,4478,4480,4482,4484,4486,4488],{"class":3427,"line":3670},[1291,4479,4382],{"class":3435},[1291,4481,3627],{"class":3439},[1291,4483,3691],{"class":3435},[1291,4485,4390],{"class":3435},[1291,4487,4414],{"class":3431},[1291,4489,4107],{"class":3435},[1291,4491,4492,4494,4496,4498,4500,4502,4504,4507],{"class":3427,"line":3677},[1291,4493,4382],{"class":3435},[1291,4495,4423],{"class":3439},[1291,4497,3691],{"class":3435},[1291,4499,4390],{"class":3435},[1291,4501,3606],{"class":3431},[1291,4503,694],{"class":3435},[1291,4505,4506],{"class":3457},"DEFAULT_IMAGE_PARSE_PROMPT",[1291,4508,4107],{"class":3435},[1291,4510,4511],{"class":3427,"line":3877},[1291,4512,4441],{"class":3435},[1291,4514,4515],{"class":3427,"line":3916},[1291,4516,3526],{"emptyLinePlaceholder":35},[1291,4518,4520,4523,4525,4527,4529,4532,4534,4537,4539,4542],{"class":3427,"line":4519},14,[1291,4521,4522],{"class":3431},"parser ",[1291,4524,3738],{"class":3435},[1291,4526,3601],{"class":3431},[1291,4528,694],{"class":3435},[1291,4530,4531],{"class":3812},"DoclingParser",[1291,4533,3816],{"class":3435},[1291,4535,4536],{"class":3819},"multimodal_llm",[1291,4538,3738],{"class":3435},[1291,4540,4541],{"class":3812},"chat",[1291,4543,3827],{"class":3435},[3206,4545,4547],{"id":4546},"setting-up-document-store","Setting Up Document Store",[73,4549,4550],{},"We will set up the document store with the sources, embedder, and parser.",[3418,4552,4554],{"className":3420,"code":4553,"language":3422,"meta":23,"style":23},"# Setup document store\n# splitter = splitters.TokenCountSplitter()\ndoc_store = VectorStoreServer(\n    *sources,\n    embedder=embedder,\n    splitter=splitter,\n    parser=parser,\n)\n",[3061,4555,4556,4561,4566,4578,4588,4600,4612,4624],{"__ignoreMap":23},[1291,4557,4558],{"class":3427,"line":3428},[1291,4559,4560],{"class":3673},"# Setup document store\n",[1291,4562,4563],{"class":3427,"line":24},[1291,4564,4565],{"class":3673},"# splitter = splitters.TokenCountSplitter()\n",[1291,4567,4568,4571,4573,4576],{"class":3427,"line":675},[1291,4569,4570],{"class":3431},"doc_store ",[1291,4572,3738],{"class":3435},[1291,4574,4575],{"class":3812}," VectorStoreServer",[1291,4577,3874],{"class":3435},[1291,4579,4580,4583,4586],{"class":3427,"line":3542},[1291,4581,4582],{"class":3435},"    *",[1291,4584,4585],{"class":3812},"sources",[1291,4587,4107],{"class":3435},[1291,4589,4590,4593,4595,4598],{"class":3427,"line":3547},[1291,4591,4592],{"class":3819},"    embedder",[1291,4594,3738],{"class":3435},[1291,4596,4597],{"class":3812},"embedder",[1291,4599,4107],{"class":3435},[1291,4601,4602,4605,4607,4610],{"class":3427,"line":3572},[1291,4603,4604],{"class":3819},"    splitter",[1291,4606,3738],{"class":3435},[1291,4608,4609],{"class":3812},"splitter",[1291,4611,4107],{"class":3435},[1291,4613,4614,4617,4619,4622],{"class":3427,"line":3614},[1291,4615,4616],{"class":3819},"    parser",[1291,4618,3738],{"class":3435},[1291,4620,4621],{"class":3812},"parser",[1291,4623,4107],{"class":3435},[1291,4625,4626],{"class":3427,"line":3640},[1291,4627,3827],{"class":3435},[3189,4629,4631],{"id":4630},"step-6-setting-up-question-answerer-application","Step 6: Setting Up Question Answerer Application",[73,4633,4634],{},"We will set up the question answerer application using the LiteLLM-based chat object.",[3418,4636,4638],{"className":3420,"code":4637,"language":3422,"meta":23,"style":23},"# Setup question answerer application\napp = BaseRAGQuestionAnswerer(\n        llm=chat,  # Using the LiteLLM-based chat object\n        indexer=doc_store, search_topk=2,\n        short_prompt_template=prompts.prompt_qa)\n",[3061,4639,4640,4645,4657,4671,4693],{"__ignoreMap":23},[1291,4641,4642],{"class":3427,"line":3428},[1291,4643,4644],{"class":3673},"# Setup question answerer application\n",[1291,4646,4647,4650,4652,4655],{"class":3427,"line":24},[1291,4648,4649],{"class":3431},"app ",[1291,4651,3738],{"class":3435},[1291,4653,4654],{"class":3812}," BaseRAGQuestionAnswerer",[1291,4656,3874],{"class":3435},[1291,4658,4659,4662,4664,4666,4668],{"class":3427,"line":675},[1291,4660,4661],{"class":3819},"        llm",[1291,4663,3738],{"class":3435},[1291,4665,4541],{"class":3812},[1291,4667,3566],{"class":3435},[1291,4669,4670],{"class":3673},"  # Using the LiteLLM-based chat object\n",[1291,4672,4673,4676,4678,4681,4683,4686,4688,4691],{"class":3427,"line":3542},[1291,4674,4675],{"class":3819},"        indexer",[1291,4677,3738],{"class":3435},[1291,4679,4680],{"class":3812},"doc_store",[1291,4682,3566],{"class":3435},[1291,4684,4685],{"class":3819}," search_topk",[1291,4687,3738],{"class":3435},[1291,4689,4690],{"class":3451},"2",[1291,4692,4107],{"class":3435},[1291,4694,4695,4698,4700,4703,4705,4708],{"class":3427,"line":3547},[1291,4696,4697],{"class":3819},"        short_prompt_template",[1291,4699,3738],{"class":3435},[1291,4701,4702],{"class":3812},"prompts",[1291,4704,694],{"class":3435},[1291,4706,4707],{"class":3457},"prompt_qa",[1291,4709,3827],{"class":3435},[3206,4711,4713],{"id":4712},"building-and-running-the-server","Building and Running the Server",[73,4715,4716],{},"Finally, we build and run the server.",[3418,4718,4720],{"className":3420,"code":4719,"language":3422,"meta":23,"style":23},"# Build and run the server\napp_host = \"0.0.0.0\"\napp_port = 8000\napp.build_server(host=app_host, port=app_port)\n",[3061,4721,4722,4727,4741,4751],{"__ignoreMap":23},[1291,4723,4724],{"class":3427,"line":3428},[1291,4725,4726],{"class":3673},"# Build and run the server\n",[1291,4728,4729,4732,4734,4736,4739],{"class":3427,"line":24},[1291,4730,4731],{"class":3431},"app_host ",[1291,4733,3738],{"class":3435},[1291,4735,3705],{"class":3435},[1291,4737,4738],{"class":3439},"0.0.0.0",[1291,4740,3746],{"class":3435},[1291,4742,4743,4746,4748],{"class":3427,"line":675},[1291,4744,4745],{"class":3431},"app_port ",[1291,4747,3738],{"class":3435},[1291,4749,4750],{"class":3451}," 8000\n",[1291,4752,4753,4755,4757,4760,4762,4765,4767,4770,4772,4775,4777,4780],{"class":3427,"line":3542},[1291,4754,4012],{"class":3431},[1291,4756,694],{"class":3435},[1291,4758,4759],{"class":3812},"build_server",[1291,4761,3816],{"class":3435},[1291,4763,4764],{"class":3819},"host",[1291,4766,3738],{"class":3435},[1291,4768,4769],{"class":3812},"app_host",[1291,4771,3566],{"class":3435},[1291,4773,4774],{"class":3819}," port",[1291,4776,3738],{"class":3435},[1291,4778,4779],{"class":3812},"app_port",[1291,4781,3827],{"class":3435},[3418,4783,4785],{"className":3420,"code":4784,"language":3422,"meta":23,"style":23},"import threading\nt = threading.Thread(target=app.run_server, name=\"BaseRAGQuestionAnswerer\")\nt.daemon = True\nthr = t.start()\n",[3061,4786,4787,4794,4839,4854],{"__ignoreMap":23},[1291,4788,4789,4791],{"class":3427,"line":3428},[1291,4790,3476],{"class":3475},[1291,4792,4793],{"class":3431}," threading\n",[1291,4795,4796,4799,4801,4804,4806,4809,4811,4814,4816,4818,4820,4823,4825,4828,4830,4832,4835,4837],{"class":3427,"line":24},[1291,4797,4798],{"class":3431},"t ",[1291,4800,3738],{"class":3435},[1291,4802,4803],{"class":3431}," threading",[1291,4805,694],{"class":3435},[1291,4807,4808],{"class":3812},"Thread",[1291,4810,3816],{"class":3435},[1291,4812,4813],{"class":3819},"target",[1291,4815,3738],{"class":3435},[1291,4817,4012],{"class":3812},[1291,4819,694],{"class":3435},[1291,4821,4822],{"class":3457},"run_server",[1291,4824,3566],{"class":3435},[1291,4826,4827],{"class":3819}," name",[1291,4829,3738],{"class":3435},[1291,4831,3691],{"class":3435},[1291,4833,4834],{"class":3439},"BaseRAGQuestionAnswerer",[1291,4836,3691],{"class":3435},[1291,4838,3827],{"class":3435},[1291,4840,4841,4844,4846,4849,4851],{"class":3427,"line":675},[1291,4842,4843],{"class":3431},"t",[1291,4845,694],{"class":3435},[1291,4847,4848],{"class":3457},"daemon",[1291,4850,3702],{"class":3435},[1291,4852,4853],{"class":3435}," True\n",[1291,4855,4856,4859,4861,4864,4866,4869],{"class":3427,"line":3542},[1291,4857,4858],{"class":3431},"thr ",[1291,4860,3738],{"class":3435},[1291,4862,4863],{"class":3431}," t",[1291,4865,694],{"class":3435},[1291,4867,4868],{"class":3812},"start",[1291,4870,4871],{"class":3435},"()\n",[3418,4873,4875],{"className":3420,"code":4874,"language":3422,"meta":23,"style":23},"from pathway.xpacks.llm.question_answering import RAGClient\n\n# Initialize the RAG client\nclient = RAGClient(host=\"0.0.0.0\", port=8000)\n",[3061,4876,4877,4900,4904,4909],{"__ignoreMap":23},[1291,4878,4879,4881,4883,4885,4887,4889,4891,4893,4895,4897],{"class":3427,"line":3428},[1291,4880,3550],{"class":3475},[1291,4882,3553],{"class":3431},[1291,4884,694],{"class":3435},[1291,4886,3581],{"class":3431},[1291,4888,694],{"class":3435},[1291,4890,3627],{"class":3431},[1291,4892,694],{"class":3435},[1291,4894,3632],{"class":3431},[1291,4896,3476],{"class":3475},[1291,4898,4899],{"class":3431}," RAGClient\n",[1291,4901,4902],{"class":3427,"line":24},[1291,4903,3526],{"emptyLinePlaceholder":35},[1291,4905,4906],{"class":3427,"line":675},[1291,4907,4908],{"class":3673},"# Initialize the RAG client\n",[1291,4910,4911,4914,4916,4919,4921,4923,4925,4927,4929,4931,4933,4935,4937,4940],{"class":3427,"line":3542},[1291,4912,4913],{"class":3431},"client ",[1291,4915,3738],{"class":3435},[1291,4917,4918],{"class":3812}," RAGClient",[1291,4920,3816],{"class":3435},[1291,4922,4764],{"class":3819},[1291,4924,3738],{"class":3435},[1291,4926,3691],{"class":3435},[1291,4928,4738],{"class":3439},[1291,4930,3691],{"class":3435},[1291,4932,3566],{"class":3435},[1291,4934,4774],{"class":3819},[1291,4936,3738],{"class":3435},[1291,4938,4939],{"class":3451},"8000",[1291,4941,3827],{"class":3435},[3418,4943,4945],{"className":3420,"code":4944,"language":3422,"meta":23,"style":23},"# Example usage\n\nresponse = client.answer(\"What is the Total Stockholders' equity as of December 31, 2022?\")\nprint(response)\n\n",[3061,4946,4947,4952,4956,4982],{"__ignoreMap":23},[1291,4948,4949],{"class":3427,"line":3428},[1291,4950,4951],{"class":3673},"# Example usage\n",[1291,4953,4954],{"class":3427,"line":24},[1291,4955,3526],{"emptyLinePlaceholder":35},[1291,4957,4958,4961,4963,4966,4968,4971,4973,4975,4978,4980],{"class":3427,"line":675},[1291,4959,4960],{"class":3431},"response ",[1291,4962,3738],{"class":3435},[1291,4964,4965],{"class":3431}," client",[1291,4967,694],{"class":3435},[1291,4969,4970],{"class":3812},"answer",[1291,4972,3816],{"class":3435},[1291,4974,3691],{"class":3435},[1291,4976,4977],{"class":3439},"What is the Total Stockholders' equity as of December 31, 2022?",[1291,4979,3691],{"class":3435},[1291,4981,3827],{"class":3435},[1291,4983,4984,4987,4989,4992],{"class":3427,"line":3542},[1291,4985,4986],{"class":3812},"print",[1291,4988,3816],{"class":3435},[1291,4990,4991],{"class":3812},"response",[1291,4993,3827],{"class":3435},[3418,4995,5000],{"className":4996,"code":4998,"language":4999},[4997],"language-text","$256,144 million\n","text",[3061,5001,4998],{"__ignoreMap":23},[73,5003,5004],{},"Now your chatbot is now running live! You can ask any questions and get information from your documents instantly.",[140,5006,5008],{"id":5007},"conclusion","Conclusion",[73,5010,5011],{},"This article demonstrated how to implement a Multimodal RAG service using Pathway and Gemini. The setup leverages the capabilities of LiteLLM to process and query multimodal data effectively. If you're looking for a cost-effective alternative, consider using the Gemini Mini, which provides great performance at a lower cost.",[73,5013,5014,5015,5017],{},"For more detailed insights and an alternative approach, check out our article on multimodal RAG using GPT-4o ",[77,5016,3147],{"href":3146},". This will give you another perspective on how to handle multimodal RAG applications using different models and techniques.\nBy following the steps outlined above, you can efficiently integrate and utilize various data types to enhance your AI applications, ensuring more accurate and contextually rich outputs.",[5019,5020,5021],"style",{},"html pre.shiki code .s0W1g, html code.shiki .s0W1g{--shiki-default:#BABED8}html pre.shiki code .sAklC, html code.shiki .sAklC{--shiki-default:#89DDFF}html pre.shiki code .sfyAc, html code.shiki .sfyAc{--shiki-default:#C3E88D}html pre.shiki code .sx098, html code.shiki .sx098{--shiki-default:#F78C6C}html pre.shiki code .s-wAU, html code.shiki .s-wAU{--shiki-default:#F07178}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html pre.shiki code .s6cf3, html code.shiki .s6cf3{--shiki-default:#89DDFF;--shiki-default-font-style:italic}html pre.shiki code .saEQR, html code.shiki .saEQR{--shiki-default:#676E95;--shiki-default-font-style:italic}html pre.shiki code .sdLwU, html code.shiki .sdLwU{--shiki-default:#82AAFF}html pre.shiki code .s7ZW3, html code.shiki .s7ZW3{--shiki-default:#BABED8;--shiki-default-font-style:italic}",{"title":23,"searchDepth":24,"depth":24,"links":5023},[5024,5025,5032,5040],{"id":3156,"depth":24,"text":3157},{"id":3186,"depth":24,"text":3187,"children":5026},[5027,5028,5029,5030,5031],{"id":3191,"depth":675,"text":3192},{"id":3262,"depth":675,"text":3174},{"id":3316,"depth":675,"text":3177},{"id":3349,"depth":675,"text":3350},{"id":3359,"depth":675,"text":3360},{"id":3398,"depth":24,"text":3399,"children":5033},[5034,5035,5036,5037,5038,5039],{"id":3408,"depth":675,"text":3409},{"id":3461,"depth":675,"text":3462},{"id":3716,"depth":675,"text":3717},{"id":3921,"depth":675,"text":3922},{"id":4156,"depth":675,"text":4157},{"id":4630,"depth":675,"text":4631},{"id":5007,"depth":24,"text":5008},"End-to-end template showing how you can launch a document processing RAG pipeline that utilizes Gemini and Pathway",{"aside":35,"layout":90,"thumbnail":5043,"date":5045,"tags":5046,"keywords":5048,"notebook_export_path":5059,"run_template":5060,"hidden":35},{"src":3101,"fit":5044},"contain","2024-08-06",[5047,3627],"showcase",[5049,5050,5051,5052,3267,5053,5054,5055,5056,5057,5058],"LLM","RAG","GPT","OpenAI","multimodal RAG","MM-RAG","unstructured","notebook","Gemini RAG","RAG Gemini","notebooks\u002Fshowcases\u002Fmultimodal-rag-using-Gemini.ipynb","\u002Fdevelopers\u002Ftemplates\u002Frag\u002Ftemplate-multimodal-rag","\u002Fframework\u002Fblog\u002Fgemini-rag",{"title":3093,"description":5041},{"loc":5061},"framework\u002Fblog\u002F1001.gemini-rag","pgWjERJ2NvHTeunGcu3MKJlhcra6lra69ndyvdPs00M",{"id":5067,"title":5068,"author":5069,"body":5075,"description":23,"extension":27,"meta":6263,"navigation":35,"path":6272,"seo":6273,"sitemap":6274,"stem":6275,"__hash__":6276},"content\u002Fframework\u002Fblog\u002F1002.langchain-integration.md","Langchain and Pathway: RAG Apps with always-up-to-date knowledge",{"id":5070,"url":5071,"name":5072,"description":5073,"img":10,"provider":11,"linkedin":5074},"szymon","szymon-dudycz","Szymon Dudycz","Algorithm and Data Processing Magician","https:\u002F\u002Fwww.linkedin.com\u002Fin\u002Fszymon-dudycz-19ab2962\u002F",{"type":13,"value":5076,"toc":6255},[5077,5080,5083,5097,5100,5104,5119,5139,5143,5160,5167,5189,5192,5300,5306,5453,5463,5646,5662,5666,5675,5713,5766,5817,5820,5858,5861,5894,5898,5901,6154,6157,6182,6186,6198,6217,6252],[68,5078,5068],{"id":5079},"langchain-and-pathway-rag-apps-with-always-up-to-date-knowledge",[73,5081,5082],{},"You can now use Pathway in your RAG applications which enables always up-to-date knowledge from your documents to LLMs with Langchaing integration.",[73,5084,5085,5086,5091,5092,694],{},"Pathway is now available on ",[77,5087,5090],{"href":5088,"rel":5089},"https:\u002F\u002Fpython.langchain.com\u002Fdocs\u002Fintegrations\u002Fvectorstores\u002Fpathway\u002F",[81],"Langchain",", a framework for developing applications powered by large language models (LLMs).\nYou can now query Pathway and access up-to-date documents for your RAG applications from LangChain using ",[77,5093,5096],{"href":5094,"rel":5095},"https:\u002F\u002Fapi.python.langchain.com\u002Fen\u002Flatest\u002Fvectorstores\u002Flangchain_community.vectorstores.pathway.PathwayVectorClient.html",[81],"PathwayVectorClient",[73,5098,5099],{},"With this new integration, you will be able to use Pathway Live Data Framework Vector Store natively in LangChain. In this guide, you will have a quick dive into Pathway + LangChain to learn how to create a simple, yet powerful RAG solution.",[140,5101,5103],{"id":5102},"prerequisites","Prerequisites",[73,5105,5106,5107,5110,5111,5114,5115,5118],{},"To work with LangChain you need to install ",[3061,5108,5109],{},"langchain"," package, as it is not a dependence of Pathway. In the example in this guide you will also use ",[3061,5112,5113],{},"OpenAIEmbeddings"," class for which you need ",[3061,5116,5117],{},"langchain_openai"," package.",[3418,5120,5122],{"className":3420,"code":5121,"language":3422,"meta":23,"style":23},"!pip install langchain\n!pip install langchain_community\n!pip install langchain_openai\n\n",[3061,5123,5124,5129,5134],{"__ignoreMap":23},[1291,5125,5126],{"class":3427,"line":3428},[1291,5127,5128],{"class":3431},"!pip install langchain\n",[1291,5130,5131],{"class":3427,"line":24},[1291,5132,5133],{"class":3431},"!pip install langchain_community\n",[1291,5135,5136],{"class":3427,"line":675},[1291,5137,5138],{"class":3431},"!pip install langchain_openai\n",[140,5140,5142],{"id":5141},"using-langchain-components-in-pathway-live-data-framework-vector-store","Using LangChain components in Pathway Live Data Framework Vector Store",[73,5144,5145,5146,5152,5153,5159],{},"When using Pathway ",[77,5147,5149],{"href":5148},"\u002Fdevelopers\u002Fapi-docs\u002Fpathway-xpacks-llm\u002Fvectorstore#pathway.xpacks.llm.vector_store.VectorStoreServer",[3061,5150,5151],{},"VectorStoreServer",", you can use LangChain embedder and splitter for processing documents. To do that, use ",[77,5154,5156],{"href":5155},"\u002Fdevelopers\u002Fapi-docs\u002Fpathway-xpacks-llm\u002Fvectorstore#pathway.xpacks.llm.vector_store.VectorStoreServer.from_langchain_components",[3061,5157,5158],{},"from_langchain_components"," class method.",[73,5161,5162,5163,694],{},"To start, you need to create a folder Pathway will listen to. Feel free to skip this if you already have a folder on which you want to build your RAG application. You can also use Google Drive, Sharepoint, or any other source from ",[77,5164,5166],{"href":5165},"\u002Fdevelopers\u002Fapi-docs\u002Fpathway-io","pathway-io",[3418,5168,5170],{"className":3420,"code":5169,"language":3422,"meta":23,"style":23},"!mkdir -p 'data\u002F'\n",[3061,5171,5172],{"__ignoreMap":23},[1291,5173,5174,5176,5178,5181,5183,5186],{"class":3427,"line":3428},[1291,5175,3946],{"class":3431},[1291,5177,3949],{"class":3435},[1291,5179,5180],{"class":3431},"p ",[1291,5182,3436],{"class":3435},[1291,5184,5185],{"class":3439},"data\u002F",[1291,5187,5188],{"class":3435},"'\n",[73,5190,5191],{},"To run this example you also need to set OpenAI API key, or change the embedder.",[3418,5193,5195],{"className":3420,"code":5194,"language":3422,"meta":23,"style":23},"import os\nimport getpass\n\n# Set OpenAI API Key\nif \"OPENAI_API_KEY\" in os.environ:\n    api_key = os.environ[\"OPENAI_API_KEY\"]\nelse:\n    api_key = getpass.getpass(\"OpenAI API Key:\")\n",[3061,5196,5197,5203,5210,5214,5219,5244,5268,5275],{"__ignoreMap":23},[1291,5198,5199,5201],{"class":3427,"line":3428},[1291,5200,3476],{"class":3475},[1291,5202,3486],{"class":3431},[1291,5204,5205,5207],{"class":3427,"line":24},[1291,5206,3476],{"class":3475},[1291,5208,5209],{"class":3431}," getpass\n",[1291,5211,5212],{"class":3427,"line":675},[1291,5213,3526],{"emptyLinePlaceholder":35},[1291,5215,5216],{"class":3427,"line":3542},[1291,5217,5218],{"class":3673},"# Set OpenAI API Key\n",[1291,5220,5221,5224,5226,5229,5231,5234,5237,5239,5241],{"class":3427,"line":3547},[1291,5222,5223],{"class":3475},"if",[1291,5225,3705],{"class":3435},[1291,5227,5228],{"class":3439},"OPENAI_API_KEY",[1291,5230,3691],{"class":3435},[1291,5232,5233],{"class":3435}," in",[1291,5235,5236],{"class":3431}," os",[1291,5238,694],{"class":3435},[1291,5240,3685],{"class":3457},[1291,5242,5243],{"class":3435},":\n",[1291,5245,5246,5249,5251,5253,5255,5257,5259,5261,5263,5265],{"class":3427,"line":3572},[1291,5247,5248],{"class":3431},"    api_key ",[1291,5250,3738],{"class":3435},[1291,5252,5236],{"class":3431},[1291,5254,694],{"class":3435},[1291,5256,3685],{"class":3457},[1291,5258,3688],{"class":3435},[1291,5260,3691],{"class":3435},[1291,5262,5228],{"class":3439},[1291,5264,3691],{"class":3435},[1291,5266,5267],{"class":3435},"]\n",[1291,5269,5270,5273],{"class":3427,"line":3614},[1291,5271,5272],{"class":3475},"else",[1291,5274,5243],{"class":3435},[1291,5276,5277,5279,5281,5284,5286,5289,5291,5293,5296,5298],{"class":3427,"line":3640},[1291,5278,5248],{"class":3431},[1291,5280,3738],{"class":3435},[1291,5282,5283],{"class":3431}," getpass",[1291,5285,694],{"class":3435},[1291,5287,5288],{"class":3812},"getpass",[1291,5290,3816],{"class":3435},[1291,5292,3691],{"class":3435},[1291,5294,5295],{"class":3439},"OpenAI API Key:",[1291,5297,3691],{"class":3435},[1291,5299,3827],{"class":3435},[73,5301,5302,5303,5305],{},"To run the server use Pathway filesystem connector to read files from the ",[3061,5304,3935],{}," folder.",[3418,5307,5309],{"className":3420,"code":5308,"language":3422,"meta":23,"style":23},"import pathway as pw\n\nfrom pathway.xpacks.llm.vector_store import VectorStoreServer\nfrom langchain_openai import OpenAIEmbeddings\nfrom langchain.text_splitter import CharacterTextSplitter\n\ndata = pw.io.fs.read(\n    \".\u002Fdata\",\n    format=\"binary\",\n    mode=\"streaming\",\n    with_metadata=True,\n)\n",[3061,5310,5311,5321,5325,5347,5359,5376,5380,5403,5413,5427,5443,5449],{"__ignoreMap":23},[1291,5312,5313,5315,5317,5319],{"class":3427,"line":3428},[1291,5314,3476],{"class":3475},[1291,5316,3533],{"class":3431},[1291,5318,3536],{"class":3475},[1291,5320,3539],{"class":3431},[1291,5322,5323],{"class":3427,"line":24},[1291,5324,3526],{"emptyLinePlaceholder":35},[1291,5326,5327,5329,5331,5333,5335,5337,5339,5341,5343,5345],{"class":3427,"line":675},[1291,5328,3550],{"class":3475},[1291,5330,3553],{"class":3431},[1291,5332,694],{"class":3435},[1291,5334,3581],{"class":3431},[1291,5336,694],{"class":3435},[1291,5338,3627],{"class":3431},[1291,5340,694],{"class":3435},[1291,5342,3657],{"class":3431},[1291,5344,3476],{"class":3475},[1291,5346,3662],{"class":3431},[1291,5348,5349,5351,5354,5356],{"class":3427,"line":3542},[1291,5350,3550],{"class":3475},[1291,5352,5353],{"class":3431}," langchain_openai ",[1291,5355,3476],{"class":3475},[1291,5357,5358],{"class":3431}," OpenAIEmbeddings\n",[1291,5360,5361,5363,5366,5368,5371,5373],{"class":3427,"line":3547},[1291,5362,3550],{"class":3475},[1291,5364,5365],{"class":3431}," langchain",[1291,5367,694],{"class":3435},[1291,5369,5370],{"class":3431},"text_splitter ",[1291,5372,3476],{"class":3475},[1291,5374,5375],{"class":3431}," CharacterTextSplitter\n",[1291,5377,5378],{"class":3427,"line":3572},[1291,5379,3526],{"emptyLinePlaceholder":35},[1291,5381,5382,5385,5387,5389,5391,5393,5395,5397,5399,5401],{"class":3427,"line":3614},[1291,5383,5384],{"class":3431},"data ",[1291,5386,3738],{"class":3435},[1291,5388,4073],{"class":3431},[1291,5390,694],{"class":3435},[1291,5392,4078],{"class":3457},[1291,5394,694],{"class":3435},[1291,5396,4083],{"class":3457},[1291,5398,694],{"class":3435},[1291,5400,4088],{"class":3812},[1291,5402,3874],{"class":3435},[1291,5404,5405,5407,5409,5411],{"class":3427,"line":3640},[1291,5406,4382],{"class":3435},[1291,5408,3928],{"class":3439},[1291,5410,3691],{"class":3435},[1291,5412,4107],{"class":3435},[1291,5414,5415,5417,5419,5421,5423,5425],{"class":3427,"line":3665},[1291,5416,4112],{"class":3819},[1291,5418,3738],{"class":3435},[1291,5420,3691],{"class":3435},[1291,5422,4119],{"class":3439},[1291,5424,3691],{"class":3435},[1291,5426,4107],{"class":3435},[1291,5428,5429,5432,5434,5436,5439,5441],{"class":3427,"line":3670},[1291,5430,5431],{"class":3819},"    mode",[1291,5433,3738],{"class":3435},[1291,5435,3691],{"class":3435},[1291,5437,5438],{"class":3439},"streaming",[1291,5440,3691],{"class":3435},[1291,5442,4107],{"class":3435},[1291,5444,5445,5447],{"class":3427,"line":3677},[1291,5446,4128],{"class":3819},[1291,5448,4131],{"class":3435},[1291,5450,5451],{"class":3427,"line":3877},[1291,5452,3827],{"class":3435},[73,5454,5455,5456,5459,5460,5462],{},"And then pass them to the server, which will split them using ",[3061,5457,5458],{},"CharacterTextSplitter"," and embed them using ",[3061,5461,5113],{},", both from LangChain.",[3418,5464,5466],{"className":3420,"code":5465,"language":3422,"meta":23,"style":23},"embeddings = OpenAIEmbeddings(api_key=api_key)\nsplitter = CharacterTextSplitter()\n\nhost = \"127.0.0.1\"\nport = 8666\n\nserver = VectorStoreServer.from_langchain_components(\n    data, embedder=embeddings, splitter=splitter\n)\nserver.run_server(host, port=port, with_cache=True, cache_backend=pw.persistence.Backend.filesystem(\".\u002FCache\"), threaded=True)\n",[3061,5467,5468,5488,5500,5504,5518,5528,5532,5547,5572,5576],{"__ignoreMap":23},[1291,5469,5470,5473,5475,5478,5480,5482,5484,5486],{"class":3427,"line":3428},[1291,5471,5472],{"class":3431},"embeddings ",[1291,5474,3738],{"class":3435},[1291,5476,5477],{"class":3812}," OpenAIEmbeddings",[1291,5479,3816],{"class":3435},[1291,5481,3820],{"class":3819},[1291,5483,3738],{"class":3435},[1291,5485,3820],{"class":3812},[1291,5487,3827],{"class":3435},[1291,5489,5490,5493,5495,5498],{"class":3427,"line":24},[1291,5491,5492],{"class":3431},"splitter ",[1291,5494,3738],{"class":3435},[1291,5496,5497],{"class":3812}," CharacterTextSplitter",[1291,5499,4871],{"class":3435},[1291,5501,5502],{"class":3427,"line":675},[1291,5503,3526],{"emptyLinePlaceholder":35},[1291,5505,5506,5509,5511,5513,5516],{"class":3427,"line":3542},[1291,5507,5508],{"class":3431},"host ",[1291,5510,3738],{"class":3435},[1291,5512,3705],{"class":3435},[1291,5514,5515],{"class":3439},"127.0.0.1",[1291,5517,3746],{"class":3435},[1291,5519,5520,5523,5525],{"class":3427,"line":3547},[1291,5521,5522],{"class":3431},"port ",[1291,5524,3738],{"class":3435},[1291,5526,5527],{"class":3451}," 8666\n",[1291,5529,5530],{"class":3427,"line":3572},[1291,5531,3526],{"emptyLinePlaceholder":35},[1291,5533,5534,5537,5539,5541,5543,5545],{"class":3427,"line":3614},[1291,5535,5536],{"class":3431},"server ",[1291,5538,3738],{"class":3435},[1291,5540,4575],{"class":3431},[1291,5542,694],{"class":3435},[1291,5544,5158],{"class":3812},[1291,5546,3874],{"class":3435},[1291,5548,5549,5552,5554,5557,5559,5562,5564,5567,5569],{"class":3427,"line":3640},[1291,5550,5551],{"class":3812},"    data",[1291,5553,3566],{"class":3435},[1291,5555,5556],{"class":3819}," embedder",[1291,5558,3738],{"class":3435},[1291,5560,5561],{"class":3812},"embeddings",[1291,5563,3566],{"class":3435},[1291,5565,5566],{"class":3819}," splitter",[1291,5568,3738],{"class":3435},[1291,5570,5571],{"class":3812},"splitter\n",[1291,5573,5574],{"class":3427,"line":3665},[1291,5575,3827],{"class":3435},[1291,5577,5578,5581,5583,5585,5587,5589,5591,5593,5595,5598,5600,5603,5606,5609,5611,5613,5615,5618,5620,5623,5625,5628,5630,5632,5635,5637,5640,5643],{"class":3427,"line":3670},[1291,5579,5580],{"class":3431},"server",[1291,5582,694],{"class":3435},[1291,5584,4822],{"class":3812},[1291,5586,3816],{"class":3435},[1291,5588,4764],{"class":3812},[1291,5590,3566],{"class":3435},[1291,5592,4774],{"class":3819},[1291,5594,3738],{"class":3435},[1291,5596,5597],{"class":3812},"port",[1291,5599,3566],{"class":3435},[1291,5601,5602],{"class":3819}," with_cache",[1291,5604,5605],{"class":3435},"=True,",[1291,5607,5608],{"class":3819}," cache_backend",[1291,5610,3738],{"class":3435},[1291,5612,3841],{"class":3812},[1291,5614,694],{"class":3435},[1291,5616,5617],{"class":3457},"persistence",[1291,5619,694],{"class":3435},[1291,5621,5622],{"class":3457},"Backend",[1291,5624,694],{"class":3435},[1291,5626,5627],{"class":3812},"filesystem",[1291,5629,3816],{"class":3435},[1291,5631,3691],{"class":3435},[1291,5633,5634],{"class":3439},".\u002FCache",[1291,5636,3691],{"class":3435},[1291,5638,5639],{"class":3435},"),",[1291,5641,5642],{"class":3819}," threaded",[1291,5644,5645],{"class":3435},"=True)\n",[73,5647,5648,5649,5654,5655,5657,5658,5661],{},"The server is now running and ready for querying with a ",[77,5650,5652],{"href":5651},"\u002Fdevelopers\u002Fapi-docs\u002Fpathway-xpacks-llm\u002Fvectorstore#pathway.xpacks.llm.vector_store.VectorStoreClient",[3061,5653,5151],{}," or with a ",[3061,5656,5096],{}," from ",[3061,5659,5660],{},"langchain-community"," described in the next Section.",[140,5663,5665],{"id":5664},"using-pathway-as-a-vector-store-in-langchain-pipelines","Using Pathway as a Vector Store in LangChain pipelines",[73,5667,5668,5669,5671,5672,694],{},"Once you have a ",[3061,5670,5151],{}," running you can access it from LangChain pipeline by using ",[77,5673,5096],{"href":5094,"rel":5674},[81],[73,5676,5677,5678,5681,5682,3126,5684,5686,5687,5689,5690,5692,5693,5698,5699,5702,5703,3126,5708,694],{},"To do that you need to provide either the ",[3061,5679,5680],{},"url"," or ",[3061,5683,4764],{},[3061,5685,5597],{}," of the running ",[3061,5688,5151],{},". In the code example below, you will connect to the ",[3061,5691,5151],{}," defined in the previous Section, so make sure it's running before making queries. Alternatively, you can also use a publicly available ",[77,5694,5697],{"href":5695,"rel":5696},"https:\u002F\u002Fpathway.com\u002Fsolutions",[81],"demo pipeline"," to test your client. Its REST API you can access at ",[3061,5700,5701],{},"https:\u002F\u002Fdemo-document-indexing.pathway.stream",". This demo ingests documents from ",[77,5704,5707],{"href":5705,"rel":5706},"https:\u002F\u002Fdrive.google.com\u002Fdrive\u002Fu\u002F0\u002Ffolders\u002F1cULDv2OaViJBmOfG5WB0oWcgayNrGtVs",[81],"Google Drive",[77,5709,5712],{"href":5710,"rel":5711},"https:\u002F\u002Fnavalgo.sharepoint.com\u002Fsites\u002FConnectorSandbox\u002FShared%20Documents\u002FForms\u002FAllItems.aspx?id=%2Fsites%2FConnectorSandbox%2FShared%20Documents%2FIndexerSandbox&p=true&ga=1",[81],"Sharepoint",[3418,5714,5716],{"className":3420,"code":5715,"language":3422,"meta":23,"style":23},"from langchain_community.vectorstores import PathwayVectorClient\n\nclient = PathwayVectorClient(host=host, port=port)\n",[3061,5717,5718,5735,5739],{"__ignoreMap":23},[1291,5719,5720,5722,5725,5727,5730,5732],{"class":3427,"line":3428},[1291,5721,3550],{"class":3475},[1291,5723,5724],{"class":3431}," langchain_community",[1291,5726,694],{"class":3435},[1291,5728,5729],{"class":3431},"vectorstores ",[1291,5731,3476],{"class":3475},[1291,5733,5734],{"class":3431}," PathwayVectorClient\n",[1291,5736,5737],{"class":3427,"line":24},[1291,5738,3526],{"emptyLinePlaceholder":35},[1291,5740,5741,5743,5745,5748,5750,5752,5754,5756,5758,5760,5762,5764],{"class":3427,"line":675},[1291,5742,4913],{"class":3431},[1291,5744,3738],{"class":3435},[1291,5746,5747],{"class":3812}," PathwayVectorClient",[1291,5749,3816],{"class":3435},[1291,5751,4764],{"class":3819},[1291,5753,3738],{"class":3435},[1291,5755,4764],{"class":3812},[1291,5757,3566],{"class":3435},[1291,5759,4774],{"class":3819},[1291,5761,3738],{"class":3435},[1291,5763,5597],{"class":3812},[1291,5765,3827],{"class":3435},[3418,5767,5769],{"className":3420,"code":5768,"language":3422,"meta":23,"style":23},"query = \"What is Pathway?\"\ndocs = client.similarity_search(query)\nprint(docs)\n",[3061,5770,5771,5785,5806],{"__ignoreMap":23},[1291,5772,5773,5776,5778,5780,5783],{"class":3427,"line":3428},[1291,5774,5775],{"class":3431},"query ",[1291,5777,3738],{"class":3435},[1291,5779,3705],{"class":3435},[1291,5781,5782],{"class":3439},"What is Pathway?",[1291,5784,3746],{"class":3435},[1291,5786,5787,5790,5792,5794,5796,5799,5801,5804],{"class":3427,"line":24},[1291,5788,5789],{"class":3431},"docs ",[1291,5791,3738],{"class":3435},[1291,5793,4965],{"class":3431},[1291,5795,694],{"class":3435},[1291,5797,5798],{"class":3812},"similarity_search",[1291,5800,3816],{"class":3435},[1291,5802,5803],{"class":3812},"query",[1291,5805,3827],{"class":3435},[1291,5807,5808,5810,5812,5815],{"class":3427,"line":675},[1291,5809,4986],{"class":3812},[1291,5811,3816],{"class":3435},[1291,5813,5814],{"class":3812},"docs",[1291,5816,3827],{"class":3435},[73,5818,5819],{},"As you can see, the LLM cannot respond clearly as it lacks current knowledge, but this is where Pathway shines. Add new data to the folder Pathway is listening to, then ask our agent again to see how it responds.\nTo do that, you can download the repo readme of Pathway into our data folder:",[3418,5821,5823],{"className":3420,"code":5822,"language":3422,"meta":23,"style":23},"!wget 'https:\u002F\u002Fraw.githubusercontent.com\u002Fpathwaycom\u002Fpathway\u002Fmain\u002FREADME.md' -O 'data\u002Fpathway_readme.md' -q -nc\n",[3061,5824,5825],{"__ignoreMap":23},[1291,5826,5827,5829,5831,5834,5836,5839,5842,5844,5847,5849,5851,5853,5855],{"class":3427,"line":3428},[1291,5828,3967],{"class":3431},[1291,5830,3436],{"class":3435},[1291,5832,5833],{"class":3439},"https:\u002F\u002Fraw.githubusercontent.com\u002Fpathwaycom\u002Fpathway\u002Fmain\u002FREADME.md",[1291,5835,3436],{"class":3435},[1291,5837,5838],{"class":3435}," -",[1291,5840,5841],{"class":3431},"O ",[1291,5843,3436],{"class":3435},[1291,5845,5846],{"class":3439},"data\u002Fpathway_readme.md",[1291,5848,3436],{"class":3435},[1291,5850,5838],{"class":3435},[1291,5852,3972],{"class":3431},[1291,5854,3949],{"class":3435},[1291,5856,5857],{"class":3431},"nc\n",[73,5859,5860],{},"Try again to query with the new data:",[3418,5862,5864],{"className":3420,"code":5863,"language":3422,"meta":23,"style":23},"docs = client.similarity_search(query)\nprint(docs)\n",[3061,5865,5866,5884],{"__ignoreMap":23},[1291,5867,5868,5870,5872,5874,5876,5878,5880,5882],{"class":3427,"line":3428},[1291,5869,5789],{"class":3431},[1291,5871,3738],{"class":3435},[1291,5873,4965],{"class":3431},[1291,5875,694],{"class":3435},[1291,5877,5798],{"class":3812},[1291,5879,3816],{"class":3435},[1291,5881,5803],{"class":3812},[1291,5883,3827],{"class":3435},[1291,5885,5886,5888,5890,5892],{"class":3427,"line":24},[1291,5887,4986],{"class":3812},[1291,5889,3816],{"class":3435},[1291,5891,5814],{"class":3812},[1291,5893,3827],{"class":3435},[3189,5895,5897],{"id":5896},"rag-pipeline-in-langchain","RAG pipeline in LangChain",[73,5899,5900],{},"The next step is to write a chain in LangChain. The next example implements a simple RAG, that given a question, retrieves documents from Pathway Live Data Framework Vector Store. These are then used as a context for the given question in a prompt sent to the OpenAI chat.",[3418,5902,5904],{"className":3420,"code":5903,"language":3422,"meta":23,"style":23},"from langchain_core.output_parsers import StrOutputParser\nfrom langchain_core.prompts import ChatPromptTemplate\nfrom langchain_core.runnables import RunnablePassthrough\nfrom langchain_openai import ChatOpenAI\n\nretriever = client.as_retriever()\n\ntemplate = \"\"\"\nYou are smart assistant that helps users with their documents on Google Drive and Sharepoint.\nGiven a context, respond to the user question.\nCONTEXT:\n{context}\nQUESTION: {question}\nYOUR ANSWER:\"\"\"\n\nprompt = ChatPromptTemplate.from_template(template)\nllm = ChatOpenAI()\nchain = (\n    {\"context\": retriever, \"question\": RunnablePassthrough()}\n    | prompt\n    | llm\n    | StrOutputParser()\n)\n",[3061,5905,5906,5923,5939,5955,5966,5970,5986,5990,6000,6005,6010,6015,6020,6028,6036,6041,6064,6076,6087,6122,6131,6139,6149],{"__ignoreMap":23},[1291,5907,5908,5910,5913,5915,5918,5920],{"class":3427,"line":3428},[1291,5909,3550],{"class":3475},[1291,5911,5912],{"class":3431}," langchain_core",[1291,5914,694],{"class":3435},[1291,5916,5917],{"class":3431},"output_parsers ",[1291,5919,3476],{"class":3475},[1291,5921,5922],{"class":3431}," StrOutputParser\n",[1291,5924,5925,5927,5929,5931,5934,5936],{"class":3427,"line":24},[1291,5926,3550],{"class":3475},[1291,5928,5912],{"class":3431},[1291,5930,694],{"class":3435},[1291,5932,5933],{"class":3431},"prompts ",[1291,5935,3476],{"class":3475},[1291,5937,5938],{"class":3431}," ChatPromptTemplate\n",[1291,5940,5941,5943,5945,5947,5950,5952],{"class":3427,"line":675},[1291,5942,3550],{"class":3475},[1291,5944,5912],{"class":3431},[1291,5946,694],{"class":3435},[1291,5948,5949],{"class":3431},"runnables ",[1291,5951,3476],{"class":3475},[1291,5953,5954],{"class":3431}," RunnablePassthrough\n",[1291,5956,5957,5959,5961,5963],{"class":3427,"line":3542},[1291,5958,3550],{"class":3475},[1291,5960,5353],{"class":3431},[1291,5962,3476],{"class":3475},[1291,5964,5965],{"class":3431}," ChatOpenAI\n",[1291,5967,5968],{"class":3427,"line":3547},[1291,5969,3526],{"emptyLinePlaceholder":35},[1291,5971,5972,5975,5977,5979,5981,5984],{"class":3427,"line":3572},[1291,5973,5974],{"class":3431},"retriever ",[1291,5976,3738],{"class":3435},[1291,5978,4965],{"class":3431},[1291,5980,694],{"class":3435},[1291,5982,5983],{"class":3812},"as_retriever",[1291,5985,4871],{"class":3435},[1291,5987,5988],{"class":3427,"line":3614},[1291,5989,3526],{"emptyLinePlaceholder":35},[1291,5991,5992,5995,5997],{"class":3427,"line":3640},[1291,5993,5994],{"class":3431},"template ",[1291,5996,3738],{"class":3435},[1291,5998,5999],{"class":3435}," \"\"\"\n",[1291,6001,6002],{"class":3427,"line":3665},[1291,6003,6004],{"class":3439},"You are smart assistant that helps users with their documents on Google Drive and Sharepoint.\n",[1291,6006,6007],{"class":3427,"line":3670},[1291,6008,6009],{"class":3439},"Given a context, respond to the user question.\n",[1291,6011,6012],{"class":3427,"line":3677},[1291,6013,6014],{"class":3439},"CONTEXT:\n",[1291,6016,6017],{"class":3427,"line":3877},[1291,6018,6019],{"class":3451},"{context}\n",[1291,6021,6022,6025],{"class":3427,"line":3916},[1291,6023,6024],{"class":3439},"QUESTION: ",[1291,6026,6027],{"class":3451},"{question}\n",[1291,6029,6030,6033],{"class":3427,"line":4519},[1291,6031,6032],{"class":3439},"YOUR ANSWER:",[1291,6034,6035],{"class":3435},"\"\"\"\n",[1291,6037,6039],{"class":3427,"line":6038},15,[1291,6040,3526],{"emptyLinePlaceholder":35},[1291,6042,6044,6047,6049,6052,6054,6057,6059,6062],{"class":3427,"line":6043},16,[1291,6045,6046],{"class":3431},"prompt ",[1291,6048,3738],{"class":3435},[1291,6050,6051],{"class":3431}," ChatPromptTemplate",[1291,6053,694],{"class":3435},[1291,6055,6056],{"class":3812},"from_template",[1291,6058,3816],{"class":3435},[1291,6060,6061],{"class":3812},"template",[1291,6063,3827],{"class":3435},[1291,6065,6067,6069,6071,6074],{"class":3427,"line":6066},17,[1291,6068,3586],{"class":3431},[1291,6070,3738],{"class":3435},[1291,6072,6073],{"class":3812}," ChatOpenAI",[1291,6075,4871],{"class":3435},[1291,6077,6079,6082,6084],{"class":3427,"line":6078},18,[1291,6080,6081],{"class":3431},"chain ",[1291,6083,3738],{"class":3435},[1291,6085,6086],{"class":3435}," (\n",[1291,6088,6090,6093,6095,6098,6100,6102,6105,6107,6109,6112,6114,6116,6119],{"class":3427,"line":6089},19,[1291,6091,6092],{"class":3435},"    {",[1291,6094,3691],{"class":3435},[1291,6096,6097],{"class":3439},"context",[1291,6099,3691],{"class":3435},[1291,6101,4390],{"class":3435},[1291,6103,6104],{"class":3431}," retriever",[1291,6106,3566],{"class":3435},[1291,6108,3705],{"class":3435},[1291,6110,6111],{"class":3439},"question",[1291,6113,3691],{"class":3435},[1291,6115,4390],{"class":3435},[1291,6117,6118],{"class":3812}," RunnablePassthrough",[1291,6120,6121],{"class":3435},"()}\n",[1291,6123,6125,6128],{"class":3427,"line":6124},20,[1291,6126,6127],{"class":3435},"    |",[1291,6129,6130],{"class":3431}," prompt\n",[1291,6132,6134,6136],{"class":3427,"line":6133},21,[1291,6135,6127],{"class":3435},[1291,6137,6138],{"class":3431}," llm\n",[1291,6140,6142,6144,6147],{"class":3427,"line":6141},22,[1291,6143,6127],{"class":3435},[1291,6145,6146],{"class":3812}," StrOutputParser",[1291,6148,4871],{"class":3435},[1291,6150,6152],{"class":3427,"line":6151},23,[1291,6153,3827],{"class":3435},[73,6155,6156],{},"Now you have a RAG chain written in LangChain that uses Pathway as its Vector Store. Test it by asking some question.",[3418,6158,6160],{"className":3420,"code":6159,"language":3422,"meta":23,"style":23},"chain.invoke(\"What is Pathway?\")\n",[3061,6161,6162],{"__ignoreMap":23},[1291,6163,6164,6167,6169,6172,6174,6176,6178,6180],{"class":3427,"line":3428},[1291,6165,6166],{"class":3431},"chain",[1291,6168,694],{"class":3435},[1291,6170,6171],{"class":3812},"invoke",[1291,6173,3816],{"class":3435},[1291,6175,3691],{"class":3435},[1291,6177,5782],{"class":3439},[1291,6179,3691],{"class":3435},[1291,6181,3827],{"class":3435},[3189,6183,6185],{"id":6184},"vector-store-statistics","Vector Store statistics",[73,6187,6188,6189,6194,6195,6197],{},"Just like ",[77,6190,6191],{"href":5651},[3061,6192,6193],{},"VectorStoreClient"," from the Pathway Live Data Framework LLM xpack, ",[3061,6196,5096],{}," gives you two methods for getting information about indexed documents.",[73,6199,6200,6201,6208,6209,6216],{},"The first one is ",[77,6202,6205],{"href":6203,"rel":6204},"https:\u002F\u002Fapi.python.langchain.com\u002Fen\u002Flatest\u002Fvectorstores\u002Flangchain_community.vectorstores.pathway.PathwayVectorClient.html#langchain_community.vectorstores.pathway.PathwayVectorClient.get_vectorstore_statistics",[81],[3061,6206,6207],{},"get_vectorstore_statistics"," and gives essential statistics on the state of the vector store, like the number of indexed files and the timestamp of the last updated one. The second one is ",[77,6210,6213],{"href":6211,"rel":6212},"https:\u002F\u002Fapi.python.langchain.com\u002Fen\u002Flatest\u002Fvectorstores\u002Flangchain_community.vectorstores.pathway.PathwayVectorClient.html#langchain_community.vectorstores.pathway.PathwayVectorClient.get_input_files",[81],[3061,6214,6215],{},"get_input_files",", which gets the list of indexed files along with the associated metadata.",[3418,6218,6220],{"className":3420,"code":6219,"language":3422,"meta":23,"style":23},"print(client.get_vectorstore_statistics())\nprint(client.get_input_files())\n",[3061,6221,6222,6238],{"__ignoreMap":23},[1291,6223,6224,6226,6228,6231,6233,6235],{"class":3427,"line":3428},[1291,6225,4986],{"class":3812},[1291,6227,3816],{"class":3435},[1291,6229,6230],{"class":3812},"client",[1291,6232,694],{"class":3435},[1291,6234,6207],{"class":3812},[1291,6236,6237],{"class":3435},"())\n",[1291,6239,6240,6242,6244,6246,6248,6250],{"class":3427,"line":24},[1291,6241,4986],{"class":3812},[1291,6243,3816],{"class":3435},[1291,6245,6230],{"class":3812},[1291,6247,694],{"class":3435},[1291,6249,6215],{"class":3812},[1291,6251,6237],{"class":3435},[5019,6253,6254],{},"html pre.shiki code .s0W1g, html code.shiki .s0W1g{--shiki-default:#BABED8}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html pre.shiki code .sAklC, html code.shiki .sAklC{--shiki-default:#89DDFF}html pre.shiki code .sfyAc, html code.shiki .sfyAc{--shiki-default:#C3E88D}html pre.shiki code .s6cf3, html code.shiki .s6cf3{--shiki-default:#89DDFF;--shiki-default-font-style:italic}html pre.shiki code .saEQR, html code.shiki .saEQR{--shiki-default:#676E95;--shiki-default-font-style:italic}html pre.shiki code .s-wAU, html code.shiki .s-wAU{--shiki-default:#F07178}html pre.shiki code .sdLwU, html code.shiki .sdLwU{--shiki-default:#82AAFF}html pre.shiki code .s7ZW3, html code.shiki .s7ZW3{--shiki-default:#BABED8;--shiki-default-font-style:italic}html pre.shiki code .sx098, html code.shiki .sx098{--shiki-default:#F78C6C}",{"title":23,"searchDepth":24,"depth":24,"links":6256},[6257,6258,6259],{"id":5102,"depth":24,"text":5103},{"id":5141,"depth":24,"text":5142},{"id":5664,"depth":24,"text":5665,"children":6260},[6261,6262],{"id":5896,"depth":675,"text":5897},{"id":6184,"depth":675,"text":6185},{"layout":90,"date":6264,"hidden":35,"thumbnail":6265,"tags":6267,"notebook_export_path":6269,"keywords":6270},"2024-05-18",{"src":6266},"\u002Fassets\u002Fcontent\u002Fshowcases\u002Fvectorstore\u002FLangchain-Pathway.png",[5047,3627,6268],"engineering","notebooks\u002Fshowcases\u002Flangchain-integration.ipynb",[5049,5050,5051,5052,6271,5056],"LangChain","\u002Fframework\u002Fblog\u002Flangchain-integration",{"title":5068,"description":23},{"loc":6272},"framework\u002Fblog\u002F1002.langchain-integration","oqfvxhdOkZj2Pp9I1_3UcId4QHpdaQFTmfqtQMRz3kA",{"id":6278,"title":6279,"author":6280,"body":6281,"description":7324,"extension":27,"meta":7325,"navigation":35,"path":7333,"seo":7334,"sitemap":7335,"stem":7336,"__hash__":7337},"content\u002Fframework\u002Fblog\u002F1003.llamaindex-pathway.md","LlamaIndex and Pathway: RAG Apps with always-up-to-date knowledge",{"id":7,"url":8,"name":9,"img":10,"provider":11},{"type":13,"value":6282,"toc":7311},[6283,6286,6289,6308,6311,6315,6318,6322,6325,6328,6339,6341,6345,6391,6395,6400,6420,6424,6510,6514,6517,6523,6526,6651,6655,6662,6672,6677,6987,6990,6994,7179,7185,7188,7194,7219,7221,7262,7268,7271,7273,7276,7293,7308],[68,6284,6279],{"id":6285},"llamaindex-and-pathway-rag-apps-with-always-up-to-date-knowledge",[73,6287,6288],{},"You can now use Pathway Live Data Framework in your RAG applications which enables always up-to-date knowledge from your documents to LLMs with LlamaIndex integration.",[73,6290,6291,6292,6297,6298,3126,6303,694],{},"The Pathway Live Data Framework is now available on ",[77,6293,6296],{"href":6294,"rel":6295},"https:\u002F\u002Fdocs.llamaindex.ai\u002Fen\u002Fstable\u002F",[81],"LlamaIndex",", a data framework for LLM-based applications to ingest, structure, and access private or domain-specific data.\nYou can now query the framework and access up-to-date documents for your RAG applications from LlamaIndex using Pathway ",[77,6299,6302],{"href":6300,"rel":6301},"https:\u002F\u002Fdocs.llamaindex.ai\u002Fen\u002Fstable\u002Fexamples\u002Fdata_connectors\u002FPathwayReaderDemo.html#pathway-reader",[81],"Reader",[77,6304,6307],{"href":6305,"rel":6306},"https:\u002F\u002Fdocs.llamaindex.ai\u002Fen\u002Fstable\u002Fexamples\u002Fretrievers\u002Fpathway_retriever.html#pathway-retriever",[81],"Retriever",[73,6309,6310],{},"With this new integration, you will be able to use the framework's vector store natively in LlamaIndex, which opens up endless new possibilities!\nIn this article, you will have a quick dive into Pathway Live Data Framework + LlamaIndex to explore how to create a simple, yet powerful RAG solution using PathwayRetriever.",[140,6312,6314],{"id":6313},"why-pathway-live-data-framework","Why Pathway Live Data Framework?",[73,6316,6317],{},"The Pathway Live Data Framework offers an indexing solution that is always up to date without the need for traditional ETL pipelines, which are needed in regular VectorDBs. It can monitor several data sources (files, S3 folders, cloud storage) and provide the latest information to your LLM application.",[140,6319,6321],{"id":6320},"learning-outcomes","Learning outcomes",[73,6323,6324],{},"You will learn how to create a simple RAG solution using Pathway Live Data Framework and LlamaIndex.",[73,6326,6327],{},"This article consists of:",[145,6329,6330,6333,6336],{},[148,6331,6332],{},"Create data sources. Define data sources the framework will read and keep the vector store updated.",[148,6334,6335],{},"Creating a transformation pipeline (parsing, splitting, embedding) for loading documents into Vector store",[148,6337,6338],{},"Querying your data and getting answers from LlamaIndex.",[140,6340,5103],{"id":5102},[3189,6342,6344],{"id":6343},"installing-pathway-live-data-framework-and-llamaindex","Installing Pathway Live Data Framework and LlamaIndex.",[3418,6346,6350],{"className":6347,"code":6348,"language":6349,"meta":23,"style":23},"language-bash shiki shiki-themes material-theme-palenight","pip install pathway\npip install llama-index\npip install llama-index-retrievers-pathway\npip install llama-index-embeddings-openai\n","bash",[3061,6351,6352,6364,6373,6382],{"__ignoreMap":23},[1291,6353,6354,6358,6361],{"class":3427,"line":3428},[1291,6355,6357],{"class":6356},"s5Dmg","pip",[1291,6359,6360],{"class":3439}," install",[1291,6362,6363],{"class":3439}," pathway\n",[1291,6365,6366,6368,6370],{"class":3427,"line":24},[1291,6367,6357],{"class":6356},[1291,6369,6360],{"class":3439},[1291,6371,6372],{"class":3439}," llama-index\n",[1291,6374,6375,6377,6379],{"class":3427,"line":675},[1291,6376,6357],{"class":6356},[1291,6378,6360],{"class":3439},[1291,6380,6381],{"class":3439}," llama-index-retrievers-pathway\n",[1291,6383,6384,6386,6388],{"class":3427,"line":3542},[1291,6385,6357],{"class":6356},[1291,6387,6360],{"class":3439},[1291,6389,6390],{"class":3439}," llama-index-embeddings-openai\n",[3189,6392,6394],{"id":6393},"setting-up-a-folder","Setting up a folder",[73,6396,6397,6398,694],{},"To start, you need to create a folder the framework will listen to. Feel free to skip this if you already have a folder on which you want to build your RAG application. You can also use Google Drive, Sharepoint, or any other source from ",[77,6399,5166],{"href":5165},[3418,6401,6403],{"className":6347,"code":6402,"language":6349,"meta":23,"style":23},"mkdir -p 'data\u002F'\n",[3061,6404,6405],{"__ignoreMap":23},[1291,6406,6407,6410,6413,6416,6418],{"class":3427,"line":3428},[1291,6408,6409],{"class":6356},"mkdir",[1291,6411,6412],{"class":3439}," -p",[1291,6414,6415],{"class":3435}," '",[1291,6417,5185],{"class":3439},[1291,6419,5188],{"class":3435},[3189,6421,6423],{"id":6422},"set-up-openai-api-key","Set up OpenAI API Key",[3418,6425,6427],{"className":3420,"code":6426,"language":3422,"meta":23,"style":23},"import getpass\nimport os\n\n# omit if embedder of choice is not OpenAI\nif \"OPENAI_API_KEY\" not in os.environ:\n    os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n",[3061,6428,6429,6435,6441,6445,6450,6473],{"__ignoreMap":23},[1291,6430,6431,6433],{"class":3427,"line":3428},[1291,6432,3476],{"class":3475},[1291,6434,5209],{"class":3431},[1291,6436,6437,6439],{"class":3427,"line":24},[1291,6438,3476],{"class":3475},[1291,6440,3486],{"class":3431},[1291,6442,6443],{"class":3427,"line":675},[1291,6444,3526],{"emptyLinePlaceholder":35},[1291,6446,6447],{"class":3427,"line":3542},[1291,6448,6449],{"class":3673},"# omit if embedder of choice is not OpenAI\n",[1291,6451,6452,6454,6456,6458,6460,6463,6465,6467,6469,6471],{"class":3427,"line":3547},[1291,6453,5223],{"class":3475},[1291,6455,3705],{"class":3435},[1291,6457,5228],{"class":3439},[1291,6459,3691],{"class":3435},[1291,6461,6462],{"class":3435}," not",[1291,6464,5233],{"class":3435},[1291,6466,5236],{"class":3431},[1291,6468,694],{"class":3435},[1291,6470,3685],{"class":3457},[1291,6472,5243],{"class":3435},[1291,6474,6475,6478,6480,6482,6484,6486,6488,6490,6492,6494,6496,6498,6500,6502,6504,6506,6508],{"class":3427,"line":3572},[1291,6476,6477],{"class":3431},"    os",[1291,6479,694],{"class":3435},[1291,6481,3685],{"class":3457},[1291,6483,3688],{"class":3435},[1291,6485,3691],{"class":3435},[1291,6487,5228],{"class":3439},[1291,6489,3691],{"class":3435},[1291,6491,3699],{"class":3435},[1291,6493,3702],{"class":3435},[1291,6495,5283],{"class":3431},[1291,6497,694],{"class":3435},[1291,6499,5288],{"class":3812},[1291,6501,3816],{"class":3435},[1291,6503,3691],{"class":3435},[1291,6505,5295],{"class":3439},[1291,6507,3691],{"class":3435},[1291,6509,3827],{"class":3435},[3189,6511,6513],{"id":6512},"define-data-sources","Define data sources",[73,6515,6516],{},"The framework can listen to many sources simultaneously, such as local files, S3 folders, cloud storage, and any data stream.",[73,6518,6519,6520,6522],{},"See ",[77,6521,5166],{"href":5165}," for more information.",[73,6524,6525],{},"You can easily connect to the data inside the folder with the framework's file system connector. The data will automatically be updated whenever the content of the folder changes.",[3418,6527,6529],{"className":3420,"code":6528,"language":3422,"meta":23,"style":23},"import pathway as pw\n\ndata_sources = []\ndata_sources.append(\n    pw.io.fs.read(\n        \".\u002Fdata\",\n        format=\"binary\",\n        mode=\"streaming\",\n        with_metadata=True,\n    )  # This creates a `pathway` connector that tracks\n    # all the files in the .\u002Fdata directory\n)\n",[3061,6530,6531,6541,6545,6555,6567,6586,6597,6612,6627,6634,6642,6647],{"__ignoreMap":23},[1291,6532,6533,6535,6537,6539],{"class":3427,"line":3428},[1291,6534,3476],{"class":3475},[1291,6536,3533],{"class":3431},[1291,6538,3536],{"class":3475},[1291,6540,3539],{"class":3431},[1291,6542,6543],{"class":3427,"line":24},[1291,6544,3526],{"emptyLinePlaceholder":35},[1291,6546,6547,6550,6552],{"class":3427,"line":675},[1291,6548,6549],{"class":3431},"data_sources ",[1291,6551,3738],{"class":3435},[1291,6553,6554],{"class":3435}," []\n",[1291,6556,6557,6560,6562,6565],{"class":3427,"line":3542},[1291,6558,6559],{"class":3431},"data_sources",[1291,6561,694],{"class":3435},[1291,6563,6564],{"class":3812},"append",[1291,6566,3874],{"class":3435},[1291,6568,6569,6572,6574,6576,6578,6580,6582,6584],{"class":3427,"line":3547},[1291,6570,6571],{"class":3812},"    pw",[1291,6573,694],{"class":3435},[1291,6575,4078],{"class":3457},[1291,6577,694],{"class":3435},[1291,6579,4083],{"class":3457},[1291,6581,694],{"class":3435},[1291,6583,4088],{"class":3812},[1291,6585,3874],{"class":3435},[1291,6587,6588,6591,6593,6595],{"class":3427,"line":3572},[1291,6589,6590],{"class":3435},"        \"",[1291,6592,3928],{"class":3439},[1291,6594,3691],{"class":3435},[1291,6596,4107],{"class":3435},[1291,6598,6599,6602,6604,6606,6608,6610],{"class":3427,"line":3614},[1291,6600,6601],{"class":3819},"        format",[1291,6603,3738],{"class":3435},[1291,6605,3691],{"class":3435},[1291,6607,4119],{"class":3439},[1291,6609,3691],{"class":3435},[1291,6611,4107],{"class":3435},[1291,6613,6614,6617,6619,6621,6623,6625],{"class":3427,"line":3640},[1291,6615,6616],{"class":3819},"        mode",[1291,6618,3738],{"class":3435},[1291,6620,3691],{"class":3435},[1291,6622,5438],{"class":3439},[1291,6624,3691],{"class":3435},[1291,6626,4107],{"class":3435},[1291,6628,6629,6632],{"class":3427,"line":3665},[1291,6630,6631],{"class":3819},"        with_metadata",[1291,6633,4131],{"class":3435},[1291,6635,6636,6639],{"class":3427,"line":3670},[1291,6637,6638],{"class":3435},"    )",[1291,6640,6641],{"class":3673},"  # This creates a `pathway` connector that tracks\n",[1291,6643,6644],{"class":3427,"line":3677},[1291,6645,6646],{"class":3673},"    # all the files in the .\u002Fdata directory\n",[1291,6648,6649],{"class":3427,"line":3877},[1291,6650,3827],{"class":3435},[3189,6652,6654],{"id":6653},"create-the-document-indexing-pipeline","Create the document indexing pipeline",[73,6656,6657,6658,6661],{},"Now that the data is ready, you must create the document indexing pipeline. The transformations should be a list of ",[3061,6659,6660],{},"TransformComponent","s ending with an Embedding transformation.",[73,6663,6664,6665,6668,6669,694],{},"First, split the text using ",[3061,6666,6667],{},"TokenTextSplitter",", then embed it with ",[3061,6670,6671],{},"OpenAIEmbedding",[73,6673,6674,6675,694],{},"Finally, you can run the server with ",[3061,6676,4822],{},[3418,6678,6680],{"className":3420,"code":6679,"language":3422,"meta":23,"style":23},"from pathway.xpacks.llm.vector_store import VectorStoreServer\nfrom llama_index.embeddings.openai import OpenAIEmbedding\nfrom llama_index.core.node_parser import TokenTextSplitter\n\nembed_model = OpenAIEmbedding(embed_batch_size=10)\n\ntransformations_example = [\n    TokenTextSplitter(\n        chunk_size=150,\n        chunk_overlap=10,\n        separator=\" \",\n    ),\n    embed_model,\n]\n\nprocessing_pipeline = VectorStoreServer.from_llamaindex_components(\n    *data_sources,\n    transformations=transformations_example,\n)\n\n# Define the Host and port that the pipeline will be on\nPATHWAY_HOST = \"127.0.0.1\"\nPATHWAY_PORT = 8754\n\n# `threaded` runs the pipeline in detached mode, you have to set it to False when running from terminal or container\n# for more information on `with_cache` check out \u002Fdevelopers\u002Fapi-docs\u002Fpersistence-api\nprocessing_pipeline.run_server(\n    host=PATHWAY_HOST, port=PATHWAY_PORT, with_cache=False, threaded=True\n)\n",[3061,6681,6682,6704,6725,6746,6750,6772,6776,6786,6793,6805,6816,6829,6834,6841,6845,6849,6865,6873,6885,6889,6893,6898,6911,6921,6926,6932,6938,6950,6982],{"__ignoreMap":23},[1291,6683,6684,6686,6688,6690,6692,6694,6696,6698,6700,6702],{"class":3427,"line":3428},[1291,6685,3550],{"class":3475},[1291,6687,3553],{"class":3431},[1291,6689,694],{"class":3435},[1291,6691,3581],{"class":3431},[1291,6693,694],{"class":3435},[1291,6695,3627],{"class":3431},[1291,6697,694],{"class":3435},[1291,6699,3657],{"class":3431},[1291,6701,3476],{"class":3475},[1291,6703,3662],{"class":3431},[1291,6705,6706,6708,6711,6713,6715,6717,6720,6722],{"class":3427,"line":24},[1291,6707,3550],{"class":3475},[1291,6709,6710],{"class":3431}," llama_index",[1291,6712,694],{"class":3435},[1291,6714,5561],{"class":3431},[1291,6716,694],{"class":3435},[1291,6718,6719],{"class":3431},"openai ",[1291,6721,3476],{"class":3475},[1291,6723,6724],{"class":3431}," OpenAIEmbedding\n",[1291,6726,6727,6729,6731,6733,6736,6738,6741,6743],{"class":3427,"line":675},[1291,6728,3550],{"class":3475},[1291,6730,6710],{"class":3431},[1291,6732,694],{"class":3435},[1291,6734,6735],{"class":3431},"core",[1291,6737,694],{"class":3435},[1291,6739,6740],{"class":3431},"node_parser ",[1291,6742,3476],{"class":3475},[1291,6744,6745],{"class":3431}," TokenTextSplitter\n",[1291,6747,6748],{"class":3427,"line":3542},[1291,6749,3526],{"emptyLinePlaceholder":35},[1291,6751,6752,6755,6757,6760,6762,6765,6767,6770],{"class":3427,"line":3547},[1291,6753,6754],{"class":3431},"embed_model ",[1291,6756,3738],{"class":3435},[1291,6758,6759],{"class":3812}," OpenAIEmbedding",[1291,6761,3816],{"class":3435},[1291,6763,6764],{"class":3819},"embed_batch_size",[1291,6766,3738],{"class":3435},[1291,6768,6769],{"class":3451},"10",[1291,6771,3827],{"class":3435},[1291,6773,6774],{"class":3427,"line":3572},[1291,6775,3526],{"emptyLinePlaceholder":35},[1291,6777,6778,6781,6783],{"class":3427,"line":3614},[1291,6779,6780],{"class":3431},"transformations_example ",[1291,6782,3738],{"class":3435},[1291,6784,6785],{"class":3435}," [\n",[1291,6787,6788,6791],{"class":3427,"line":3640},[1291,6789,6790],{"class":3812},"    TokenTextSplitter",[1291,6792,3874],{"class":3435},[1291,6794,6795,6798,6800,6803],{"class":3427,"line":3665},[1291,6796,6797],{"class":3819},"        chunk_size",[1291,6799,3738],{"class":3435},[1291,6801,6802],{"class":3451},"150",[1291,6804,4107],{"class":3435},[1291,6806,6807,6810,6812,6814],{"class":3427,"line":3670},[1291,6808,6809],{"class":3819},"        chunk_overlap",[1291,6811,3738],{"class":3435},[1291,6813,6769],{"class":3451},[1291,6815,4107],{"class":3435},[1291,6817,6818,6821,6823,6825,6827],{"class":3427,"line":3677},[1291,6819,6820],{"class":3819},"        separator",[1291,6822,3738],{"class":3435},[1291,6824,3691],{"class":3435},[1291,6826,3705],{"class":3435},[1291,6828,4107],{"class":3435},[1291,6830,6831],{"class":3427,"line":3877},[1291,6832,6833],{"class":3435},"    ),\n",[1291,6835,6836,6839],{"class":3427,"line":3916},[1291,6837,6838],{"class":3431},"    embed_model",[1291,6840,4107],{"class":3435},[1291,6842,6843],{"class":3427,"line":4519},[1291,6844,5267],{"class":3435},[1291,6846,6847],{"class":3427,"line":6038},[1291,6848,3526],{"emptyLinePlaceholder":35},[1291,6850,6851,6854,6856,6858,6860,6863],{"class":3427,"line":6043},[1291,6852,6853],{"class":3431},"processing_pipeline ",[1291,6855,3738],{"class":3435},[1291,6857,4575],{"class":3431},[1291,6859,694],{"class":3435},[1291,6861,6862],{"class":3812},"from_llamaindex_components",[1291,6864,3874],{"class":3435},[1291,6866,6867,6869,6871],{"class":3427,"line":6066},[1291,6868,4582],{"class":3435},[1291,6870,6559],{"class":3812},[1291,6872,4107],{"class":3435},[1291,6874,6875,6878,6880,6883],{"class":3427,"line":6078},[1291,6876,6877],{"class":3819},"    transformations",[1291,6879,3738],{"class":3435},[1291,6881,6882],{"class":3812},"transformations_example",[1291,6884,4107],{"class":3435},[1291,6886,6887],{"class":3427,"line":6089},[1291,6888,3827],{"class":3435},[1291,6890,6891],{"class":3427,"line":6124},[1291,6892,3526],{"emptyLinePlaceholder":35},[1291,6894,6895],{"class":3427,"line":6133},[1291,6896,6897],{"class":3673},"# Define the Host and port that the pipeline will be on\n",[1291,6899,6900,6903,6905,6907,6909],{"class":3427,"line":6141},[1291,6901,6902],{"class":3431},"PATHWAY_HOST ",[1291,6904,3738],{"class":3435},[1291,6906,3705],{"class":3435},[1291,6908,5515],{"class":3439},[1291,6910,3746],{"class":3435},[1291,6912,6913,6916,6918],{"class":3427,"line":6151},[1291,6914,6915],{"class":3431},"PATHWAY_PORT ",[1291,6917,3738],{"class":3435},[1291,6919,6920],{"class":3451}," 8754\n",[1291,6922,6924],{"class":3427,"line":6923},24,[1291,6925,3526],{"emptyLinePlaceholder":35},[1291,6927,6929],{"class":3427,"line":6928},25,[1291,6930,6931],{"class":3673},"# `threaded` runs the pipeline in detached mode, you have to set it to False when running from terminal or container\n",[1291,6933,6935],{"class":3427,"line":6934},26,[1291,6936,6937],{"class":3673},"# for more information on `with_cache` check out \u002Fdevelopers\u002Fapi-docs\u002Fpersistence-api\n",[1291,6939,6941,6944,6946,6948],{"class":3427,"line":6940},27,[1291,6942,6943],{"class":3431},"processing_pipeline",[1291,6945,694],{"class":3435},[1291,6947,4822],{"class":3812},[1291,6949,3874],{"class":3435},[1291,6951,6953,6956,6958,6961,6963,6965,6967,6970,6972,6974,6977,6979],{"class":3427,"line":6952},28,[1291,6954,6955],{"class":3819},"    host",[1291,6957,3738],{"class":3435},[1291,6959,6960],{"class":3812},"PATHWAY_HOST",[1291,6962,3566],{"class":3435},[1291,6964,4774],{"class":3819},[1291,6966,3738],{"class":3435},[1291,6968,6969],{"class":3812},"PATHWAY_PORT",[1291,6971,3566],{"class":3435},[1291,6973,5602],{"class":3819},[1291,6975,6976],{"class":3435},"=False,",[1291,6978,5642],{"class":3819},[1291,6980,6981],{"class":3435},"=True\n",[1291,6983,6985],{"class":3427,"line":6984},29,[1291,6986,3827],{"class":3435},[73,6988,6989],{},"Awesome! The vector store is now active, you're set to start sending queries.",[3189,6991,6993],{"id":6992},"create-llamindex-retriever-and-create-query-engine","Create LlamIndex Retriever and create Query Engine",[3418,6995,6997],{"className":3420,"code":6996,"language":3422,"meta":23,"style":23},"from llama_index.retrievers.pathway import PathwayRetriever\n\nretriever = PathwayRetriever(host=PATHWAY_HOST, port=PATHWAY_PORT)\nretriever.retrieve(str_or_query_bundle=\"what is pathway\")\n\n\nfrom llama_index.core.query_engine import RetrieverQueryEngine\n\nquery_engine = RetrieverQueryEngine.from_args(\n    retriever,\n)\n\nresponse = query_engine.query(\"What is Pathway?\")\nprint(str(response))\n",[3061,6998,6999,7020,7024,7051,7077,7081,7085,7105,7109,7125,7132,7136,7140,7163],{"__ignoreMap":23},[1291,7000,7001,7003,7005,7007,7010,7012,7015,7017],{"class":3427,"line":3428},[1291,7002,3550],{"class":3475},[1291,7004,6710],{"class":3431},[1291,7006,694],{"class":3435},[1291,7008,7009],{"class":3431},"retrievers",[1291,7011,694],{"class":3435},[1291,7013,7014],{"class":3431},"pathway ",[1291,7016,3476],{"class":3475},[1291,7018,7019],{"class":3431}," PathwayRetriever\n",[1291,7021,7022],{"class":3427,"line":24},[1291,7023,3526],{"emptyLinePlaceholder":35},[1291,7025,7026,7028,7030,7033,7035,7037,7039,7041,7043,7045,7047,7049],{"class":3427,"line":675},[1291,7027,5974],{"class":3431},[1291,7029,3738],{"class":3435},[1291,7031,7032],{"class":3812}," PathwayRetriever",[1291,7034,3816],{"class":3435},[1291,7036,4764],{"class":3819},[1291,7038,3738],{"class":3435},[1291,7040,6960],{"class":3812},[1291,7042,3566],{"class":3435},[1291,7044,4774],{"class":3819},[1291,7046,3738],{"class":3435},[1291,7048,6969],{"class":3812},[1291,7050,3827],{"class":3435},[1291,7052,7053,7056,7058,7061,7063,7066,7068,7070,7073,7075],{"class":3427,"line":3542},[1291,7054,7055],{"class":3431},"retriever",[1291,7057,694],{"class":3435},[1291,7059,7060],{"class":3812},"retrieve",[1291,7062,3816],{"class":3435},[1291,7064,7065],{"class":3819},"str_or_query_bundle",[1291,7067,3738],{"class":3435},[1291,7069,3691],{"class":3435},[1291,7071,7072],{"class":3439},"what is pathway",[1291,7074,3691],{"class":3435},[1291,7076,3827],{"class":3435},[1291,7078,7079],{"class":3427,"line":3547},[1291,7080,3526],{"emptyLinePlaceholder":35},[1291,7082,7083],{"class":3427,"line":3572},[1291,7084,3526],{"emptyLinePlaceholder":35},[1291,7086,7087,7089,7091,7093,7095,7097,7100,7102],{"class":3427,"line":3614},[1291,7088,3550],{"class":3475},[1291,7090,6710],{"class":3431},[1291,7092,694],{"class":3435},[1291,7094,6735],{"class":3431},[1291,7096,694],{"class":3435},[1291,7098,7099],{"class":3431},"query_engine ",[1291,7101,3476],{"class":3475},[1291,7103,7104],{"class":3431}," RetrieverQueryEngine\n",[1291,7106,7107],{"class":3427,"line":3640},[1291,7108,3526],{"emptyLinePlaceholder":35},[1291,7110,7111,7113,7115,7118,7120,7123],{"class":3427,"line":3665},[1291,7112,7099],{"class":3431},[1291,7114,3738],{"class":3435},[1291,7116,7117],{"class":3431}," RetrieverQueryEngine",[1291,7119,694],{"class":3435},[1291,7121,7122],{"class":3812},"from_args",[1291,7124,3874],{"class":3435},[1291,7126,7127,7130],{"class":3427,"line":3670},[1291,7128,7129],{"class":3812},"    retriever",[1291,7131,4107],{"class":3435},[1291,7133,7134],{"class":3427,"line":3677},[1291,7135,3827],{"class":3435},[1291,7137,7138],{"class":3427,"line":3877},[1291,7139,3526],{"emptyLinePlaceholder":35},[1291,7141,7142,7144,7146,7149,7151,7153,7155,7157,7159,7161],{"class":3427,"line":3916},[1291,7143,4960],{"class":3431},[1291,7145,3738],{"class":3435},[1291,7147,7148],{"class":3431}," query_engine",[1291,7150,694],{"class":3435},[1291,7152,5803],{"class":3812},[1291,7154,3816],{"class":3435},[1291,7156,3691],{"class":3435},[1291,7158,5782],{"class":3439},[1291,7160,3691],{"class":3435},[1291,7162,3827],{"class":3435},[1291,7164,7165,7167,7169,7172,7174,7176],{"class":3427,"line":4519},[1291,7166,4986],{"class":3812},[1291,7168,3816],{"class":3435},[1291,7170,7171],{"class":6356},"str",[1291,7173,3816],{"class":3435},[1291,7175,4991],{"class":3812},[1291,7177,7178],{"class":3435},"))\n",[3418,7180,7183],{"className":7181,"code":7182,"language":4999},[4997],"Out[]: Empty Response\n",[3061,7184,7182],{"__ignoreMap":23},[73,7186,7187],{},"As you can see, the LLM cannot respond clearly as it lacks current knowledge, but this is where Pathway shines. Add new data to the folder Pathway is listening to, then ask our agent again to see how it responds.",[73,7189,7190,7191,7193],{},"To do that, you can download the repo readme of Pathway into our ",[3061,7192,3935],{}," folder:",[3418,7195,7197],{"className":6347,"code":7196,"language":6349,"meta":23,"style":23},"wget 'https:\u002F\u002Fraw.githubusercontent.com\u002Fpathwaycom\u002Fpathway\u002Fmain\u002FREADME.md' -O 'data\u002Fpathway_readme.md'\n",[3061,7198,7199],{"__ignoreMap":23},[1291,7200,7201,7204,7206,7208,7210,7213,7215,7217],{"class":3427,"line":3428},[1291,7202,7203],{"class":6356},"wget",[1291,7205,6415],{"class":3435},[1291,7207,5833],{"class":3439},[1291,7209,3436],{"class":3435},[1291,7211,7212],{"class":3439}," -O",[1291,7214,6415],{"class":3435},[1291,7216,5846],{"class":3439},[1291,7218,5188],{"class":3435},[73,7220,5860],{},[3418,7222,7224],{"className":3420,"code":7223,"language":3422,"meta":23,"style":23},"response = query_engine.query(\"What is Pathway?\")\nprint(str(response))\n",[3061,7225,7226,7248],{"__ignoreMap":23},[1291,7227,7228,7230,7232,7234,7236,7238,7240,7242,7244,7246],{"class":3427,"line":3428},[1291,7229,4960],{"class":3431},[1291,7231,3738],{"class":3435},[1291,7233,7148],{"class":3431},[1291,7235,694],{"class":3435},[1291,7237,5803],{"class":3812},[1291,7239,3816],{"class":3435},[1291,7241,3691],{"class":3435},[1291,7243,5782],{"class":3439},[1291,7245,3691],{"class":3435},[1291,7247,3827],{"class":3435},[1291,7249,7250,7252,7254,7256,7258,7260],{"class":3427,"line":24},[1291,7251,4986],{"class":3812},[1291,7253,3816],{"class":3435},[1291,7255,7171],{"class":6356},[1291,7257,3816],{"class":3435},[1291,7259,4991],{"class":3812},[1291,7261,7178],{"class":3435},[3418,7263,7266],{"className":7264,"code":7265,"language":4999},[4997],"Out[]: Pathway is a Python framework that allows for high-throughput and low-latency real-time data processing...\n",[3061,7267,7265],{"__ignoreMap":23},[73,7269,7270],{},"As you can see, after downloading the document to the folder the framework is listening to, changes are reflected to the query engine immediately.\nLLM responses are up to date with the latest changes in the documents which would require extra ETL steps in regular Vector DBs.",[140,7272,5008],{"id":5007},[73,7274,7275],{},"With the integration of Pathway Live Data Framework within LlamaIndex, you can now access up-to-date documents for your RAG applications from LlamaIndex.\nYou should now be able to use the Pathway Reader and Retriever to connect to your data sources and monitor for changes, providing always up-to-date documents for your LlamaIndex application.",[73,7277,7278,7279,7283,7284,3126,7289,694],{},"If you are interested in building RAG solutions with Pathway Live Data Framework, don't hesitate to read ",[77,7280,7282],{"href":7281},"\u002Fdevelopers\u002Fuser-guide\u002Fllm-xpack\u002Fdocs-indexing","how the document store pipeline is built",".\nTo learn more about the possibilities of combining the live indexing pipeline and LLMs, check out ",[77,7285,7288],{"href":7286,"rel":7287},"https:\u002F\u002Fgithub.com\u002Fpathwaycom\u002Fllm-app\u002Ftree\u002Fmain\u002Ftemplates\u002Fdrive_alert",[81],"real-time RAG alerting with Pathway",[77,7290,7292],{"href":7291},"\u002Fdevelopers\u002Ftemplates\u002Frag\u002Funstructured-to-structured","ingesting unstructured data to structured",[7294,7295,7298,7303],"shoutout-banner",{"href":7296,"icon":7297},"https:\u002F\u002Fdiscord.gg\u002Fpathway","ic:baseline-discord",[6061,7299,7300],{"v-slot:title":23},[73,7301,7302],{},"Discuss tricks & tips for RAG",[6061,7304,7305],{"v-slot:description":23},[73,7306,7307],{},"Join our Discord community and dive into discussions on tricks and tips for mastering Retrieval Augmented Generation",[5019,7309,7310],{},"html pre.shiki code .s5Dmg, html code.shiki .s5Dmg{--shiki-default:#FFCB6B}html pre.shiki code .sfyAc, html code.shiki .sfyAc{--shiki-default:#C3E88D}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html pre.shiki code .sAklC, html code.shiki .sAklC{--shiki-default:#89DDFF}html pre.shiki code .s6cf3, html code.shiki .s6cf3{--shiki-default:#89DDFF;--shiki-default-font-style:italic}html pre.shiki code .s0W1g, html code.shiki .s0W1g{--shiki-default:#BABED8}html pre.shiki code .saEQR, html code.shiki .saEQR{--shiki-default:#676E95;--shiki-default-font-style:italic}html pre.shiki code .s-wAU, html code.shiki .s-wAU{--shiki-default:#F07178}html pre.shiki code .sdLwU, html code.shiki .sdLwU{--shiki-default:#82AAFF}html pre.shiki code .s7ZW3, html code.shiki .s7ZW3{--shiki-default:#BABED8;--shiki-default-font-style:italic}html pre.shiki code .sx098, html code.shiki .sx098{--shiki-default:#F78C6C}",{"title":23,"searchDepth":24,"depth":24,"links":7312},[7313,7314,7315,7323],{"id":6313,"depth":24,"text":6314},{"id":6320,"depth":24,"text":6321},{"id":5102,"depth":24,"text":5103,"children":7316},[7317,7318,7319,7320,7321,7322],{"id":6343,"depth":675,"text":6344},{"id":6393,"depth":675,"text":6394},{"id":6422,"depth":675,"text":6423},{"id":6512,"depth":675,"text":6513},{"id":6653,"depth":675,"text":6654},{"id":6992,"depth":675,"text":6993},{"id":5007,"depth":24,"text":5008},"Pathway Live Data Framework is now available in LlamaIndex as Reader and Retriever",{"layout":90,"date":7326,"thumbnail":7327,"tags":7329,"keywords":7330,"docker_github_link":7332,"hidden":35},"2024-01-12",{"src":7328},"\u002Fassets\u002Fcontent\u002Fshowcases\u002Fvectorstore\u002Fllamaindexpathway.png",[5047,3627,632,6268],[5049,5050,5051,5052,6296,7331],"docker","https:\u002F\u002Fgithub.com\u002Fpathway-labs\u002Frealtime-indexer-qa-chat\u002Ftree\u002Fmain","\u002Fframework\u002Fblog\u002Fllamaindex-pathway",{"title":6279,"description":7324},{"loc":7333},"framework\u002Fblog\u002F1003.llamaindex-pathway","0czkRh_B2G8Giv5Feq0OZe9CWLdPkNCU0hwQ9u2isfI",{"id":7339,"title":7340,"author":7341,"body":7348,"description":8920,"extension":27,"meta":8921,"navigation":35,"path":8934,"seo":8935,"sitemap":8936,"stem":8937,"__hash__":8938},"content\u002Fframework\u002Fblog\u002F1004.enterprise_rag_sharepoint.md","Real-time Enterprise RAG with SharePoint",{"id":7342,"url":7343,"name":7344,"description":7345,"img":7346,"provider":11,"linkedin":7347},"saksham","saksham-goel","Saksham Goel","Developer Relations Engineer","\u002Fassets\u002Fauthors\u002Fsaksham-goel.png","https:\u002F\u002Fwww.linkedin.com\u002Fin\u002Fsakshamgoel650\u002F",{"type":13,"value":7349,"toc":8890},[7350,7354,7357,7360,7366,7370,7373,7376,7379,7382,7386,7389,7400,7404,7407,7411,7415,7418,7426,7429,7433,7436,7441,7445,7448,7456,7459,7463,7466,7477,7481,7484,7492,7496,7499,7514,7518,7552,7555,7562,7565,7572,7576,7579,7620,7624,7627,7638,7641,7650,7654,7657,7666,7673,7683,7689,7692,7699,7706,7718,8247,8250,8297,8304,8307,8318,8446,8452,8458,8469,8477,8483,8511,8515,8519,8522,8539,8542,8554,8558,8561,8576,8579,8593,8596,8600,8604,8607,8616,8619,8708,8711,8715,8718,8758,8761,8788,8791,8795,8798,8806,8809,8813,8852,8856,8859,8863,8877,8887],[68,7351,7353],{"id":7352},"real-time-enterprise-rag-with-sharepoint-and-pathway","Real-time Enterprise RAG with SharePoint and Pathway",[73,7355,7356],{},"Retrieval Augmented Generation (RAG) applications empower you to deliver context-specific answers based on private knowledge bases using LLMs\u002FGen AI.",[73,7358,7359],{},"SharePoint offered via Microsoft 365 is a common data source on which you might want to build your RAG applications. Microsoft SharePoint leverages workflow applications, \"list\" databases, and other web parts and security features to enable business teams to collaborate effectively and is widely used by Microsoft Office users for sharing files in a SharePoint document library.",[73,7361,7362,7363,7365],{},"The ",[77,7364,1279],{"href":711},", on the other hand, is crucial for building Enterprise RAG systems to work with live enterprise data and managing dynamic data sources like Microsoft SharePoint while maintaining high accuracy, real-time synchronization and reliability.",[140,7367,7369],{"id":7368},"what-is-real-time-rag","What is Real-time RAG?",[73,7371,7372],{},"In practical scenarios, files in data repositories are dynamic, i.e., frequently added, deleted, or modified. These ongoing changes require real-time synchronization and efficient incremental indexing to ensure the most current information is always available.",[73,7374,7375],{},"Real-time Enterprise RAG Applications stay in permanent sync with your dynamic data sources.",[73,7377,7378],{},"This app template will help you build a Real-time Enterprise RAG application that integrates with Microsoft SharePoint as a data source. Your application will always provide up-to-date knowledge, synchronized with any file insertions, deletions, or changes at any point in time, making your work easier. It avoids the need for constant ETL (Extract, Transform and Load) adjustments for such bound-to-implement considerations.",[73,7380,7381],{},"You can easily run this app template in minutes using Docker containers while ensuring the best practices needed in an enterprise setup.",[140,7383,7385],{"id":7384},"real-time-rag-with-sharepoint","Real-time RAG with SharePoint",[73,7387,7388],{},"Real-time RAG with SharePoint refers to an approach where RAG is integrated with Microsoft SharePoint as the data source, and enhanced by real-time AI capabilities. In this setup:",[145,7390,7391,7394,7397],{},[148,7392,7393],{},"Continuously index documents as they're added, changed, or removed in SharePoint.",[148,7395,7396],{},"Securely authenticate and manage documents behind enterprise-grade permissions and certificate-based authentication.",[148,7398,7399],{},"Provide up-to-date answers with minimal latency, thanks to real-time synchronization.",[3206,7401,7403],{"id":7402},"ready-to-elevate-your-rag-with-sharepoint","Ready to Elevate Your RAG with SharePoint?",[73,7405,7406],{},"Discover how Pathway Live Data Framework can streamline your SharePoint integration and drive intelligent document retrieval.",[140,7408,7410],{"id":7409},"features-of-real-time-enterprise-rag-with-sharepoint","Features of Real-time Enterprise RAG with SharePoint",[3189,7412,7414],{"id":7413},"real-time-synchronization","Real-Time Synchronization",[73,7416,7417],{},"Real-time RAG Apps must stay in sync with your data repositories to provide relevant responses.",[145,7419,7420,7423],{},[148,7421,7422],{},"The SharePoint connector supports both static and streaming modes.",[148,7424,7425],{},"Ensures that your app continuously indexes documents from SharePoint, maintaining an up-to-date knowledge base.",[73,7427,7428],{},"Imagine senior executives making strategic decisions based on last month's financial reports or outdated project statuses. This lag in information leads to misinformed decisions, missed opportunities, or significant financial losses. Real-time synchronization ensures your app delivers the most current and accurate information, preventing such scenarios.",[3189,7430,7432],{"id":7431},"detailed-metadata-handling","Detailed Metadata Handling",[73,7434,7435],{},"Enterprise RAG applications include comprehensive metadata such as file paths, modification times, and creation times in the output table. This additional context is crucial for effectively tracking and managing documents.",[145,7437,7438],{},[148,7439,7440],{},"The streaming mode ensures that this metadata is always up-to-date.",[3189,7442,7444],{"id":7443},"high-security-with-certificate-based-authentication","High Security with Certificate-Based Authentication",[73,7446,7447],{},"Enterprise workflows must ensure high security and compliance with enterprise standards.",[145,7449,7450,7453],{},[148,7451,7452],{},"The framework's certificate-based authentication future-proofs your system against the potential deprecation of simpler authentication methods by SharePoint.",[148,7454,7455],{},"For enhanced security, locally deployed LLMs can be set up within an isolated environment, like a Faraday cage, that protects against external interference. This setup ensures that sensitive data remains secure and private, adhering to the highest security standards.",[73,7457,7458],{},"While this template uses the OpenAI API as an example, you can easily swap it with private RAG setups using the additional resources provided at the end.",[3189,7460,7462],{"id":7461},"scalable-and-production-ready-deployment","Scalable and Production-Ready Deployment",[73,7464,7465],{},"Enterprise applications handle vast and ever-growing data sources, often increasing as many users within a company work on them.",[145,7467,7468,7471,7474],{},[148,7469,7470],{},"Pathway Live Data Framework provides fast, built-in, and persistent vector indexing for up to millions of pages of documents, eliminating the need for complex ETL processes.",[148,7472,7473],{},"Pathway Live Data Framework is built for scale, and it offers an integrated solution where the server and endpoints are part of the same application.",[148,7475,7476],{},"The easy Docker setup ensures consistency across different environments.",[3189,7478,7480],{"id":7479},"high-accuracy-and-enhanced-query-capabilities","High Accuracy and Enhanced Query Capabilities",[73,7482,7483],{},"The SharePoint connector allows you to easily query and manage your datasets stored in SharePoint, providing flexible and powerful options for accessing your data.",[145,7485,7486,7489],{},[148,7487,7488],{},"You can configure the connector to read data from specific directories or entire subsites, with options for both recursive and non-recursive scans.",[148,7490,7491],{},"Starting with a basic RAG pipeline provides initial accuracy, but leveraging more advanced methods such as hybrid indexing and multimodal search can significantly increase accuracy.",[140,7493,7495],{"id":7494},"step-by-step-process-to-implement-a-production-ready-rag-with-sharepoint-connector","Step-by-Step Process to Implement a Production-Ready RAG with SharePoint Connector",[73,7497,7498],{},"This template guides you in connecting Pathway Live Data Framework with SharePoint to build a real-time Enterprise RAG app.",[7500,7501,7502],"note",{},[73,7503,7504,7507,7508,7513],{},[169,7505,7506],{},"Important",": SharePoint connector requires a Pathway license key. If you haven’t already, ",[77,7509,7512],{"href":7510,"rel":7511},"https:\u002F\u002Fpathway.com\u002Fget-license",[81],"request your free license key"," to unlock the SharePoint connector and other enterprise features. The application will be updated with this key in Step 5.",[3189,7515,7517],{"id":7516},"prerequisites-for-the-enterprise-rag-app-template","Prerequisites for the Enterprise RAG App Template",[665,7519,7520,7528,7543,7549],{},[148,7521,7522,7523,694],{},"Docker Desktop: You can download it from the ",[77,7524,7527],{"href":7525,"rel":7526},"https:\u002F\u002Fwww.docker.com\u002Fproducts\u002Fdocker-desktop\u002F",[81],"Docker website",[148,7529,7530,7531,7536,7537,7542],{},"OpenAI API Key: Sign up on the ",[77,7532,7535],{"href":7533,"rel":7534},"https:\u002F\u002Fwww.openai.com\u002F",[81],"OpenAI website"," and generate an API key from the ",[77,7538,7541],{"href":7539,"rel":7540},"https:\u002F\u002Fplatform.openai.com\u002Faccount\u002Fapi-keys",[81],"API Key Management page",". Keep this key secure as you will need to use it in your configuration.",[148,7544,7545,7546,694],{},"The Pathway Live Data Framework License Key: Get your free license key ",[77,7547,3147],{"href":7548},"\u002Fframework\u002Fget-license",[148,7550,7551],{},"Certificate-Based Authentication Setup for SharePoint Integration",[73,7553,7554],{},"For better security, we use certificate-based authentication to access data from SharePoint. For this we use Azure AD, which is now renamed to Microsoft Entra ID.",[73,7556,7557,7558,694],{},"You can follow the steps in the video below to create and upload your SSL certificate to obtain necessary parameters for ",[77,7559,7561],{"href":7560},"\u002Fdevelopers\u002Fapi-docs\u002Fpathway-xpacks-sharepoint","Pathway's SharePoint connector",[126,7563],{"src":7564},"https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=9ks6zhAPAz4",[73,7566,7567,7568,7571],{},"Once done, you will use these parameters to update the ",[3061,7569,7570],{},"app.yaml"," file to successfully build and deploy your Real-time Enterprise RAG with Microsoft SharePoint and Pathway.",[3189,7573,7575],{"id":7574},"components-of-your-real-time-rag-pipeline","Components of your Real-time RAG Pipeline",[73,7577,7578],{},"This folder contains several objects:",[145,7580,7581,7587,7592,7602,7608,7614],{},[148,7582,7583,7586],{},[3061,7584,7585],{},"app.py",", the application code written in Python using Pathway Live Data Framework;",[148,7588,7589,7591],{},[3061,7590,7570],{},", the file containing configuration of the pipeline, like LLM models, data sources or server address;",[148,7593,7594,7597,7598,7601],{},[3061,7595,7596],{},"requirements.txt",", the dependencies for the pipeline. It can be passed to ",[3061,7599,7600],{},"pip install -r ..."," to install everything that is needed to launch the pipeline locally;",[148,7603,7604,7607],{},[3061,7605,7606],{},"Dockerfile",", the Docker configuration for running the pipeline in the container;",[148,7609,7610,7613],{},[3061,7611,7612],{},".env",", a short environment variables configuration file where the OpenAI key must be stored;",[148,7615,7616,7619],{},[3061,7617,7618],{},"ui\u002F",", a simple ui written in Streamlit for asking questions.",[3189,7621,7623],{"id":7622},"step-1-clone-the-pathway-live-data-framework-llm-app-repository","Step 1: Clone the Pathway Live Data Framework LLM App Repository",[73,7625,7626],{},"Clone the llm-app repository from GitHub. This repository contains all the files you’ll need.",[3418,7628,7632],{"className":7629,"code":7630,"language":7631,"meta":23,"style":23},"language-Bash shiki shiki-themes material-theme-palenight","git clone https:\u002F\u002Fgithub.com\u002Fpathwaycom\u002Fllm-app.git\n","Bash",[3061,7633,7634],{"__ignoreMap":23},[1291,7635,7636],{"class":3427,"line":3428},[1291,7637,7630],{},[73,7639,7640],{},"If you have previously cloned an older version, update it using a pull command.",[3418,7642,7644],{"className":7629,"code":7643,"language":7631,"meta":23,"style":23},"git pull\n",[3061,7645,7646],{"__ignoreMap":23},[1291,7647,7648],{"class":3427,"line":3428},[1291,7649,7643],{},[3189,7651,7653],{"id":7652},"step-2-navigate-to-the-question-answering-rag-directory","Step 2: Navigate to the Question-Answering RAG Directory",[73,7655,7656],{},"Change to the directory where the example is located:",[3418,7658,7660],{"className":7629,"code":7659,"language":7631,"meta":23,"style":23},"cd llm-app\u002Ftemplates\u002Fquestion_answering_rag\n",[3061,7661,7662],{"__ignoreMap":23},[1291,7663,7664],{"class":3427,"line":3428},[1291,7665,7659],{},[3189,7667,7669,7670,7672],{"id":7668},"step-3-create-a-env-file-and-put-your-open-ai-api-key","Step 3: Create a ",[3061,7671,7612],{}," File and put your Open AI API key",[73,7674,7675,7676,7679,7680,7682],{},"Rename the ",[3061,7677,7678],{},".env.example"," file in the project directory to ",[3061,7681,7612],{}," and update it with your OpenAPI key:",[3418,7684,7687],{"className":7685,"code":7686,"language":4999},[4997],"OPENAI_API_KEY=sk-*******\n",[3061,7688,7686],{"__ignoreMap":23},[73,7690,7691],{},"Save the file after making the changes.",[3189,7693,7695,7696,7698],{"id":7694},"step-4-modify-the-appyaml-file","Step 4: Modify the ",[3061,7697,7570],{}," File",[73,7700,7701,7702,7705],{},"By default, the YAML configuration reads documents from a local data folder.  If files need to be pulled from external repositories—such as SharePoint, Google Drive, or Amazon S3—Pathway Live Data Framework provides seamless integration through dedicated connectors.  In this configuration, the ",[3061,7703,7704],{},"!pw.xpacks.connectors.sharepoint.read"," block replaces the default local source, allowing documents to be directly ingested from SharePoint with metadata enrichment and periodic refresh intervals.",[73,7707,7708,7709,7712,7713,7717],{},"For the LLM service, the configuration uses ",[3061,7710,7711],{},"gpt-3.5-turbo"," by default, but you can switch to OpenAI models like GPT-4 or GPT-4o as needed.  Additionally, Pathway Live Data Framework supports 300+ LLMs through the ",[77,7714,7716],{"href":7715},"\u002Fdevelopers\u002Fuser-guide\u002Fllm-xpack\u002Foverview#what-about-other-models","LiteLLM Class",", offering flexibility in model selection.  Users can also integrate open-source models hosted locally, providing full control over inference and deployment, ensuring privacy and cost efficiency.",[3418,7719,7723],{"className":7720,"code":7721,"filename":7570,"language":7722,"meta":23,"style":23},"language-yaml shiki shiki-themes material-theme-palenight","$sources:\n  - !pw.xpacks.connectors.sharepoint.read \n    url: $SHAREPOINT_URL\n    tenant: $SHAREPOINT_TENANT\n    client_id: $SHAREPOINT_CLIENT_ID\n    cert_path: sharepointcert.pem\n    thumbprint: $SHAREPOINT_THUMBPRINT\n    root_path: $SHAREPOINT_ROOT\n    with_metadata: true\n    refresh_interval: 30\n\n$llm: !pw.xpacks.llm.llms.OpenAIChat\n  model: \"gpt-4o\"\n  retry_strategy: !pw.udfs.ExponentialBackoffRetryStrategy\n    max_retries: 6\n  cache_strategy: !pw.udfs.DefaultCache {}\n  temperature: 0\n  capacity: 8\n\n$embedder: !pw.xpacks.llm.embedders.OpenAIEmbedder\n  model: \"text-embedding-ada-002\"\n  cache_strategy: !pw.udfs.DefaultCache {}\n\n$splitter: !pw.xpacks.llm.splitters.TokenCountSplitter\n  max_tokens: 400\n\n$parser: !pw.xpacks.llm.parsers.UnstructuredParser\n  cache_strategy: !pw.udfs.DefaultCache {}\n\n$retriever_factory: !pw.stdlib.indexing.BruteForceKnnFactory\n  reserved_space: 1000\n  embedder: $embedder\n  metric: !pw.stdlib.indexing.BruteForceKnnMetricKind.COS\n  dimensions: 1536\n  \n$document_store: !pw.xpacks.llm.document_store.DocumentStore\n  docs: $sources\n  parser: $parser\n  splitter: $splitter\n  retriever_factory: $retriever_factory\n\nquestion_answerer: !pw.xpacks.llm.question_answering.BaseRAGQuestionAnswerer\n  llm: $llm\n  indexer: $document_store\n  # You can set the number of documents to be included as the context of the query\n  # search_topk: 6\n  # You can use your own prompt for querying.\n  # For that set prompt_template to string with `{query}` used as a placeholder for the question,\n  # and `{context}` as a placeholder for context documents.\n  # prompt_template: \"Given these documents: {context}, please answer the question: {query}\"\n\n# Change host and port by uncommenting these lines\n# host: \"0.0.0.0\"\n# port: $PATHWAY_PORT\n\n# Cache configuration\n# with_cache: true\n\n# If `terminate_on_error` is true then the program will terminate whenever any error is encountered.\n# Defaults to false, uncomment the following line if you want to set it to true\n# terminate_on_error: true\n","yaml",[3061,7724,7725,7732,7744,7754,7764,7774,7784,7794,7804,7814,7824,7828,7838,7852,7862,7872,7885,7895,7905,7909,7919,7932,7942,7946,7956,7966,7970,7980,7990,7994,8005,8016,8027,8038,8049,8055,8066,8077,8088,8099,8110,8115,8126,8137,8148,8154,8160,8166,8172,8178,8184,8189,8195,8201,8207,8212,8218,8224,8229,8235,8241],{"__ignoreMap":23},[1291,7726,7727,7730],{"class":3427,"line":3428},[1291,7728,7729],{"class":3457},"$sources",[1291,7731,5243],{"class":3435},[1291,7733,7734,7737,7741],{"class":3427,"line":24},[1291,7735,7736],{"class":3435},"  -",[1291,7738,7740],{"class":7739},"sJ14y"," !pw.xpacks.connectors.sharepoint.read",[1291,7742,7743],{"class":3431}," \n",[1291,7745,7746,7749,7751],{"class":3427,"line":675},[1291,7747,7748],{"class":3457},"    url",[1291,7750,4390],{"class":3435},[1291,7752,7753],{"class":3439}," $SHAREPOINT_URL\n",[1291,7755,7756,7759,7761],{"class":3427,"line":3542},[1291,7757,7758],{"class":3457},"    tenant",[1291,7760,4390],{"class":3435},[1291,7762,7763],{"class":3439}," $SHAREPOINT_TENANT\n",[1291,7765,7766,7769,7771],{"class":3427,"line":3547},[1291,7767,7768],{"class":3457},"    client_id",[1291,7770,4390],{"class":3435},[1291,7772,7773],{"class":3439}," $SHAREPOINT_CLIENT_ID\n",[1291,7775,7776,7779,7781],{"class":3427,"line":3572},[1291,7777,7778],{"class":3457},"    cert_path",[1291,7780,4390],{"class":3435},[1291,7782,7783],{"class":3439}," sharepointcert.pem\n",[1291,7785,7786,7789,7791],{"class":3427,"line":3614},[1291,7787,7788],{"class":3457},"    thumbprint",[1291,7790,4390],{"class":3435},[1291,7792,7793],{"class":3439}," $SHAREPOINT_THUMBPRINT\n",[1291,7795,7796,7799,7801],{"class":3427,"line":3640},[1291,7797,7798],{"class":3457},"    root_path",[1291,7800,4390],{"class":3435},[1291,7802,7803],{"class":3439}," $SHAREPOINT_ROOT\n",[1291,7805,7806,7808,7810],{"class":3427,"line":3665},[1291,7807,4128],{"class":3457},[1291,7809,4390],{"class":3435},[1291,7811,7813],{"class":7812},"sbqyR"," true\n",[1291,7815,7816,7819,7821],{"class":3427,"line":3670},[1291,7817,7818],{"class":3457},"    refresh_interval",[1291,7820,4390],{"class":3435},[1291,7822,7823],{"class":3451}," 30\n",[1291,7825,7826],{"class":3427,"line":3677},[1291,7827,3526],{"emptyLinePlaceholder":35},[1291,7829,7830,7833,7835],{"class":3427,"line":3877},[1291,7831,7832],{"class":3457},"$llm",[1291,7834,4390],{"class":3435},[1291,7836,7837],{"class":7739}," !pw.xpacks.llm.llms.OpenAIChat\n",[1291,7839,7840,7843,7845,7847,7850],{"class":3427,"line":3916},[1291,7841,7842],{"class":3457},"  model",[1291,7844,4390],{"class":3435},[1291,7846,3705],{"class":3435},[1291,7848,7849],{"class":3439},"gpt-4o",[1291,7851,3746],{"class":3435},[1291,7853,7854,7857,7859],{"class":3427,"line":4519},[1291,7855,7856],{"class":3457},"  retry_strategy",[1291,7858,4390],{"class":3435},[1291,7860,7861],{"class":7739}," !pw.udfs.ExponentialBackoffRetryStrategy\n",[1291,7863,7864,7867,7869],{"class":3427,"line":6038},[1291,7865,7866],{"class":3457},"    max_retries",[1291,7868,4390],{"class":3435},[1291,7870,7871],{"class":3451}," 6\n",[1291,7873,7874,7877,7879,7882],{"class":3427,"line":6043},[1291,7875,7876],{"class":3457},"  cache_strategy",[1291,7878,4390],{"class":3435},[1291,7880,7881],{"class":7739}," !pw.udfs.DefaultCache",[1291,7883,7884],{"class":3435}," {}\n",[1291,7886,7887,7890,7892],{"class":3427,"line":6066},[1291,7888,7889],{"class":3457},"  temperature",[1291,7891,4390],{"class":3435},[1291,7893,7894],{"class":3451}," 0\n",[1291,7896,7897,7900,7902],{"class":3427,"line":6078},[1291,7898,7899],{"class":3457},"  capacity",[1291,7901,4390],{"class":3435},[1291,7903,7904],{"class":3451}," 8\n",[1291,7906,7907],{"class":3427,"line":6089},[1291,7908,3526],{"emptyLinePlaceholder":35},[1291,7910,7911,7914,7916],{"class":3427,"line":6124},[1291,7912,7913],{"class":3457},"$embedder",[1291,7915,4390],{"class":3435},[1291,7917,7918],{"class":7739}," !pw.xpacks.llm.embedders.OpenAIEmbedder\n",[1291,7920,7921,7923,7925,7927,7930],{"class":3427,"line":6133},[1291,7922,7842],{"class":3457},[1291,7924,4390],{"class":3435},[1291,7926,3705],{"class":3435},[1291,7928,7929],{"class":3439},"text-embedding-ada-002",[1291,7931,3746],{"class":3435},[1291,7933,7934,7936,7938,7940],{"class":3427,"line":6141},[1291,7935,7876],{"class":3457},[1291,7937,4390],{"class":3435},[1291,7939,7881],{"class":7739},[1291,7941,7884],{"class":3435},[1291,7943,7944],{"class":3427,"line":6151},[1291,7945,3526],{"emptyLinePlaceholder":35},[1291,7947,7948,7951,7953],{"class":3427,"line":6923},[1291,7949,7950],{"class":3457},"$splitter",[1291,7952,4390],{"class":3435},[1291,7954,7955],{"class":7739}," !pw.xpacks.llm.splitters.TokenCountSplitter\n",[1291,7957,7958,7961,7963],{"class":3427,"line":6928},[1291,7959,7960],{"class":3457},"  max_tokens",[1291,7962,4390],{"class":3435},[1291,7964,7965],{"class":3451}," 400\n",[1291,7967,7968],{"class":3427,"line":6934},[1291,7969,3526],{"emptyLinePlaceholder":35},[1291,7971,7972,7975,7977],{"class":3427,"line":6940},[1291,7973,7974],{"class":3457},"$parser",[1291,7976,4390],{"class":3435},[1291,7978,7979],{"class":7739}," !pw.xpacks.llm.parsers.UnstructuredParser\n",[1291,7981,7982,7984,7986,7988],{"class":3427,"line":6952},[1291,7983,7876],{"class":3457},[1291,7985,4390],{"class":3435},[1291,7987,7881],{"class":7739},[1291,7989,7884],{"class":3435},[1291,7991,7992],{"class":3427,"line":6984},[1291,7993,3526],{"emptyLinePlaceholder":35},[1291,7995,7997,8000,8002],{"class":3427,"line":7996},30,[1291,7998,7999],{"class":3457},"$retriever_factory",[1291,8001,4390],{"class":3435},[1291,8003,8004],{"class":7739}," !pw.stdlib.indexing.BruteForceKnnFactory\n",[1291,8006,8008,8011,8013],{"class":3427,"line":8007},31,[1291,8009,8010],{"class":3457},"  reserved_space",[1291,8012,4390],{"class":3435},[1291,8014,8015],{"class":3451}," 1000\n",[1291,8017,8019,8022,8024],{"class":3427,"line":8018},32,[1291,8020,8021],{"class":3457},"  embedder",[1291,8023,4390],{"class":3435},[1291,8025,8026],{"class":3439}," $embedder\n",[1291,8028,8030,8033,8035],{"class":3427,"line":8029},33,[1291,8031,8032],{"class":3457},"  metric",[1291,8034,4390],{"class":3435},[1291,8036,8037],{"class":7739}," !pw.stdlib.indexing.BruteForceKnnMetricKind.COS\n",[1291,8039,8041,8044,8046],{"class":3427,"line":8040},34,[1291,8042,8043],{"class":3457},"  dimensions",[1291,8045,4390],{"class":3435},[1291,8047,8048],{"class":3451}," 1536\n",[1291,8050,8052],{"class":3427,"line":8051},35,[1291,8053,8054],{"class":3431},"  \n",[1291,8056,8058,8061,8063],{"class":3427,"line":8057},36,[1291,8059,8060],{"class":3457},"$document_store",[1291,8062,4390],{"class":3435},[1291,8064,8065],{"class":7739}," !pw.xpacks.llm.document_store.DocumentStore\n",[1291,8067,8069,8072,8074],{"class":3427,"line":8068},37,[1291,8070,8071],{"class":3457},"  docs",[1291,8073,4390],{"class":3435},[1291,8075,8076],{"class":3439}," $sources\n",[1291,8078,8080,8083,8085],{"class":3427,"line":8079},38,[1291,8081,8082],{"class":3457},"  parser",[1291,8084,4390],{"class":3435},[1291,8086,8087],{"class":3439}," $parser\n",[1291,8089,8091,8094,8096],{"class":3427,"line":8090},39,[1291,8092,8093],{"class":3457},"  splitter",[1291,8095,4390],{"class":3435},[1291,8097,8098],{"class":3439}," $splitter\n",[1291,8100,8102,8105,8107],{"class":3427,"line":8101},40,[1291,8103,8104],{"class":3457},"  retriever_factory",[1291,8106,4390],{"class":3435},[1291,8108,8109],{"class":3439}," $retriever_factory\n",[1291,8111,8113],{"class":3427,"line":8112},41,[1291,8114,3526],{"emptyLinePlaceholder":35},[1291,8116,8118,8121,8123],{"class":3427,"line":8117},42,[1291,8119,8120],{"class":3457},"question_answerer",[1291,8122,4390],{"class":3435},[1291,8124,8125],{"class":7739}," !pw.xpacks.llm.question_answering.BaseRAGQuestionAnswerer\n",[1291,8127,8129,8132,8134],{"class":3427,"line":8128},43,[1291,8130,8131],{"class":3457},"  llm",[1291,8133,4390],{"class":3435},[1291,8135,8136],{"class":3439}," $llm\n",[1291,8138,8140,8143,8145],{"class":3427,"line":8139},44,[1291,8141,8142],{"class":3457},"  indexer",[1291,8144,4390],{"class":3435},[1291,8146,8147],{"class":3439}," $document_store\n",[1291,8149,8151],{"class":3427,"line":8150},45,[1291,8152,8153],{"class":3673},"  # You can set the number of documents to be included as the context of the query\n",[1291,8155,8157],{"class":3427,"line":8156},46,[1291,8158,8159],{"class":3673},"  # search_topk: 6\n",[1291,8161,8163],{"class":3427,"line":8162},47,[1291,8164,8165],{"class":3673},"  # You can use your own prompt for querying.\n",[1291,8167,8169],{"class":3427,"line":8168},48,[1291,8170,8171],{"class":3673},"  # For that set prompt_template to string with `{query}` used as a placeholder for the question,\n",[1291,8173,8175],{"class":3427,"line":8174},49,[1291,8176,8177],{"class":3673},"  # and `{context}` as a placeholder for context documents.\n",[1291,8179,8181],{"class":3427,"line":8180},50,[1291,8182,8183],{"class":3673},"  # prompt_template: \"Given these documents: {context}, please answer the question: {query}\"\n",[1291,8185,8187],{"class":3427,"line":8186},51,[1291,8188,3526],{"emptyLinePlaceholder":35},[1291,8190,8192],{"class":3427,"line":8191},52,[1291,8193,8194],{"class":3673},"# Change host and port by uncommenting these lines\n",[1291,8196,8198],{"class":3427,"line":8197},53,[1291,8199,8200],{"class":3673},"# host: \"0.0.0.0\"\n",[1291,8202,8204],{"class":3427,"line":8203},54,[1291,8205,8206],{"class":3673},"# port: $PATHWAY_PORT\n",[1291,8208,8210],{"class":3427,"line":8209},55,[1291,8211,3526],{"emptyLinePlaceholder":35},[1291,8213,8215],{"class":3427,"line":8214},56,[1291,8216,8217],{"class":3673},"# Cache configuration\n",[1291,8219,8221],{"class":3427,"line":8220},57,[1291,8222,8223],{"class":3673},"# with_cache: true\n",[1291,8225,8227],{"class":3427,"line":8226},58,[1291,8228,3526],{"emptyLinePlaceholder":35},[1291,8230,8232],{"class":3427,"line":8231},59,[1291,8233,8234],{"class":3673},"# If `terminate_on_error` is true then the program will terminate whenever any error is encountered.\n",[1291,8236,8238],{"class":3427,"line":8237},60,[1291,8239,8240],{"class":3673},"# Defaults to false, uncomment the following line if you want to set it to true\n",[1291,8242,8244],{"class":3427,"line":8243},61,[1291,8245,8246],{"class":3673},"# terminate_on_error: true\n",[73,8248,8249],{},"Mandatory Parameters:",[145,8251,8252,8261,8267,8273,8279,8285,8291],{},[148,8253,8254,8256,8257,694],{},[3061,8255,5680],{},": The SharePoint site URL, including the site's path. For example: ",[77,8258,8259],{"href":8259,"rel":8260},"https:\u002F\u002Fcompany.sharepoint.com\u002Fsites\u002FMySite",[81],[148,8262,8263,8266],{},[3061,8264,8265],{},"tenant",": The ID of the SharePoint tenant, typically a GUID.",[148,8268,8269,8272],{},[3061,8270,8271],{},"client_id",": The Client ID of the SharePoint application with the required grants to access the data.",[148,8274,8275,8278],{},[3061,8276,8277],{},"cert_path",": The path to the certificate (typically a .pem file) added to the application for authentication.",[148,8280,8281,8284],{},[3061,8282,8283],{},"thumbprint",": The thumbprint for the specified certificate.",[148,8286,8287,8290],{},[3061,8288,8289],{},"root_path",": The path for a directory or file within the SharePoint space to be read.",[148,8292,8293,8296],{},[3061,8294,8295],{},"refresh_interval",": Time in seconds between scans if the mode is set to \"streaming\".",[73,8298,8299,8300,694],{},"For more details on additional configurations, visit the ",[77,8301,8303],{"href":8302},"\u002Fdevelopers\u002Fapi-docs\u002Fpathway-xpacks-sharepoint#pathway.xpacks.connectors.sharepoint.read","SharePoint Connector page",[73,8305,8306],{},"Example Configuration:",[73,8308,8309,8310,8313,8314,8317],{},"To illustrate the utility of this connector, consider a scenario where you need to access a dataset stored in the ",[3061,8311,8312],{},"Shared Documents\u002FData"," directory of the SharePoint site ",[3061,8315,8316],{},"Datasets",". Below is a basic example demonstrating how to configure the connector for reading this dataset in streaming mode:",[3418,8319,8321],{"className":3420,"code":8320,"language":3422,"meta":23,"style":23},"t = pw.xpacks.connectors.sharepoint.read(  \n    url=\"https:\u002F\u002Fcompany.sharepoint.com\u002Fsites\u002FDatasets\",\n    tenant=\"c2efaf1f-8add-4334-b1ca-32776acb61ea\",\n    client_id=\"f521a53a-0b36-4f47-8ef7-60dc07587eb2\",\n    cert_path=\"certificate.pem\",\n    thumbprint=\"33C1B9D17115E848B1E956E54EECAF6E77AB1B35\",\n    root_path=\"Shared Documents\u002FData\",\n)\n",[3061,8322,8323,8353,8368,8383,8398,8413,8428,8442],{"__ignoreMap":23},[1291,8324,8325,8327,8329,8331,8333,8335,8337,8340,8342,8345,8347,8349,8351],{"class":3427,"line":3428},[1291,8326,4798],{"class":3431},[1291,8328,3738],{"class":3435},[1291,8330,4073],{"class":3431},[1291,8332,694],{"class":3435},[1291,8334,3581],{"class":3457},[1291,8336,694],{"class":3435},[1291,8338,8339],{"class":3457},"connectors",[1291,8341,694],{"class":3435},[1291,8343,8344],{"class":3457},"sharepoint",[1291,8346,694],{"class":3435},[1291,8348,4088],{"class":3812},[1291,8350,3816],{"class":3435},[1291,8352,8054],{"class":3812},[1291,8354,8355,8357,8359,8361,8364,8366],{"class":3427,"line":24},[1291,8356,7748],{"class":3819},[1291,8358,3738],{"class":3435},[1291,8360,3691],{"class":3435},[1291,8362,8363],{"class":3439},"https:\u002F\u002Fcompany.sharepoint.com\u002Fsites\u002FDatasets",[1291,8365,3691],{"class":3435},[1291,8367,4107],{"class":3435},[1291,8369,8370,8372,8374,8376,8379,8381],{"class":3427,"line":675},[1291,8371,7758],{"class":3819},[1291,8373,3738],{"class":3435},[1291,8375,3691],{"class":3435},[1291,8377,8378],{"class":3439},"c2efaf1f-8add-4334-b1ca-32776acb61ea",[1291,8380,3691],{"class":3435},[1291,8382,4107],{"class":3435},[1291,8384,8385,8387,8389,8391,8394,8396],{"class":3427,"line":3542},[1291,8386,7768],{"class":3819},[1291,8388,3738],{"class":3435},[1291,8390,3691],{"class":3435},[1291,8392,8393],{"class":3439},"f521a53a-0b36-4f47-8ef7-60dc07587eb2",[1291,8395,3691],{"class":3435},[1291,8397,4107],{"class":3435},[1291,8399,8400,8402,8404,8406,8409,8411],{"class":3427,"line":3547},[1291,8401,7778],{"class":3819},[1291,8403,3738],{"class":3435},[1291,8405,3691],{"class":3435},[1291,8407,8408],{"class":3439},"certificate.pem",[1291,8410,3691],{"class":3435},[1291,8412,4107],{"class":3435},[1291,8414,8415,8417,8419,8421,8424,8426],{"class":3427,"line":3572},[1291,8416,7788],{"class":3819},[1291,8418,3738],{"class":3435},[1291,8420,3691],{"class":3435},[1291,8422,8423],{"class":3439},"33C1B9D17115E848B1E956E54EECAF6E77AB1B35",[1291,8425,3691],{"class":3435},[1291,8427,4107],{"class":3435},[1291,8429,8430,8432,8434,8436,8438,8440],{"class":3427,"line":3614},[1291,8431,7798],{"class":3819},[1291,8433,3738],{"class":3435},[1291,8435,3691],{"class":3435},[1291,8437,8312],{"class":3439},[1291,8439,3691],{"class":3435},[1291,8441,4107],{"class":3435},[1291,8443,8444],{"class":3427,"line":3640},[1291,8445,3827],{"class":3435},[73,8447,8448,8449,8451],{},"In this setup, the connector targets the ",[3061,8450,8312],{}," directory and recursively scans all subdirectories. This method ensures that no file is overlooked, providing comprehensive access to all pertinent data within the specified path.",[3189,8453,8455,8456],{"id":8454},"step-5-obtain-and-update-the-pathway-live-data-framework-license-key-in-apppy","Step 5: Obtain and Update the Pathway Live Data Framework License Key in ",[3061,8457,7585],{},[73,8459,8460,8461,8464,8465,8468],{},"The Pathway Live Data Framework is an open-source framework that provides core functionalities for free. However, to use ",[169,8462,8463],{},"advanced features like SharePoint connector",", you need a ",[169,8466,8467],{},"Pathway license key",". This key unlocks additional enterprise-grade capabilities such as enhanced RAM limits, enterprise connectors (e.g., SharePoint, Delta Table, Iceberg), full persistence and monitoring.",[73,8470,8471,8472,8476],{},"To obtain your free license key, visit ",[77,8473,8475],{"href":7510,"rel":8474},[81],"Pathway Live Data Framework License Key Page"," and follow the instructions.",[73,8478,8479,8480,8482],{},"Once you have the key, update it in ",[3061,8481,7585],{}," by replacing the existing demo key:",[3418,8484,8486],{"className":3420,"code":8485,"language":3422,"meta":23,"style":23},"# Set up license key for using Sharepoint feature\npw.set_license_key(\"demo-license-key-with-telemetry\")\n",[3061,8487,8488,8493],{"__ignoreMap":23},[1291,8489,8490],{"class":3427,"line":3428},[1291,8491,8492],{"class":3673},"# Set up license key for using Sharepoint feature\n",[1291,8494,8495,8497,8499,8501,8503,8505,8507,8509],{"class":3427,"line":24},[1291,8496,3841],{"class":3431},[1291,8498,694],{"class":3435},[1291,8500,3846],{"class":3812},[1291,8502,3816],{"class":3435},[1291,8504,3691],{"class":3435},[1291,8506,3853],{"class":3439},[1291,8508,3691],{"class":3435},[1291,8510,3827],{"class":3435},[3189,8512,8514],{"id":8513},"step-6-running-the-project","Step 6: Running the Project",[3206,8516,8518],{"id":8517},"locally","Locally",[73,8520,8521],{},"If you are using Windows, refer to the Docker instructions in the next section. For a local run, first install the dependencies:",[3418,8523,8525],{"className":6347,"code":8524,"language":6349,"meta":23,"style":23},"pip install -r requirements.txt\n",[3061,8526,8527],{"__ignoreMap":23},[1291,8528,8529,8531,8533,8536],{"class":3427,"line":3428},[1291,8530,6357],{"class":6356},[1291,8532,6360],{"class":3439},[1291,8534,8535],{"class":3439}," -r",[1291,8537,8538],{"class":3439}," requirements.txt\n",[73,8540,8541],{},"Then, start the app:",[3418,8543,8545],{"className":6347,"code":8544,"language":6349,"meta":23,"style":23},"python app.py\n",[3061,8546,8547],{"__ignoreMap":23},[1291,8548,8549,8551],{"class":3427,"line":3428},[1291,8550,3422],{"class":6356},[1291,8552,8553],{"class":3439}," app.py\n",[3206,8555,8557],{"id":8556},"with-docker","With Docker",[73,8559,8560],{},"Build the Docker with:",[3418,8562,8564],{"className":6347,"code":8563,"language":6349,"meta":23,"style":23},"docker compose build\n",[3061,8565,8566],{"__ignoreMap":23},[1291,8567,8568,8570,8573],{"class":3427,"line":3428},[1291,8569,7331],{"class":6356},[1291,8571,8572],{"class":3439}," compose",[1291,8574,8575],{"class":3439}," build\n",[73,8577,8578],{},"And, run with:",[3418,8580,8582],{"className":6347,"code":8581,"language":6349,"meta":23,"style":23},"docker compose up\n",[3061,8583,8584],{"__ignoreMap":23},[1291,8585,8586,8588,8590],{"class":3427,"line":3428},[1291,8587,7331],{"class":6356},[1291,8589,8572],{"class":3439},[1291,8591,8592],{"class":3439}," up\n",[73,8594,8595],{},"This will start the pipeline and the ui for asking questions.",[3189,8597,8599],{"id":8598},"step-7-querying-the-pipeline","Step 7: Querying the Pipeline",[3206,8601,8603],{"id":8602},"check-the-indexed-files","Check the Indexed Files",[73,8605,8606],{},"Check if your files in SharePoint are indexed for information retrieval for LLMs. To test it, query to get the list of available inputs and associated metadata using curl:",[3418,8608,8610],{"className":7629,"code":8609,"language":7631,"meta":23,"style":23},"curl -X 'POST'   'http:\u002F\u002Flocalhost:8000\u002Fv2\u002Flist_documents'   -H 'accept: *\u002F*'   -H 'Content-Type: application\u002Fjson'\n",[3061,8611,8612],{"__ignoreMap":23},[1291,8613,8614],{"class":3427,"line":3428},[1291,8615,8609],{},[73,8617,8618],{},"This will return the list of files e.g. if you start with this file uploaded on your sharepoint the answer will be as follows:",[3418,8620,8624],{"className":8621,"code":8622,"language":8623,"meta":23,"style":23},"language-json shiki shiki-themes material-theme-palenight","[{\"created_at\": null, \"modified_at\": 1718810417, \"owner\": \"root\", \"path\":\"data\u002FIdeanomicsInc_20160330_10-K_EX-10.26_9512211_EX-10.26_Content License Agreement.pdf\", \"seen_at\": 1718902304}]\n","json",[3061,8625,8626],{"__ignoreMap":23},[1291,8627,8628,8631,8633,8636,8638,8640,8643,8645,8648,8650,8652,8655,8657,8659,8662,8664,8666,8668,8671,8673,8675,8677,8680,8682,8684,8686,8689,8691,8693,8695,8698,8700,8702,8705],{"class":3427,"line":3428},[1291,8629,8630],{"class":3435},"[{",[1291,8632,3691],{"class":3435},[1291,8634,8635],{"class":7739},"created_at",[1291,8637,3691],{"class":3435},[1291,8639,4390],{"class":3435},[1291,8641,8642],{"class":3435}," null,",[1291,8644,3705],{"class":3435},[1291,8646,8647],{"class":7739},"modified_at",[1291,8649,3691],{"class":3435},[1291,8651,4390],{"class":3435},[1291,8653,8654],{"class":3451}," 1718810417",[1291,8656,3566],{"class":3435},[1291,8658,3705],{"class":3435},[1291,8660,8661],{"class":7739},"owner",[1291,8663,3691],{"class":3435},[1291,8665,4390],{"class":3435},[1291,8667,3705],{"class":3435},[1291,8669,8670],{"class":3439},"root",[1291,8672,3691],{"class":3435},[1291,8674,3566],{"class":3435},[1291,8676,3705],{"class":3435},[1291,8678,8679],{"class":7739},"path",[1291,8681,3691],{"class":3435},[1291,8683,4390],{"class":3435},[1291,8685,3691],{"class":3435},[1291,8687,8688],{"class":3439},"data\u002FIdeanomicsInc_20160330_10-K_EX-10.26_9512211_EX-10.26_Content License Agreement.pdf",[1291,8690,3691],{"class":3435},[1291,8692,3566],{"class":3435},[1291,8694,3705],{"class":3435},[1291,8696,8697],{"class":7739},"seen_at",[1291,8699,3691],{"class":3435},[1291,8701,4390],{"class":3435},[1291,8703,8704],{"class":3451}," 1718902304",[1291,8706,8707],{"class":3435},"}]\n",[73,8709,8710],{},"If you add or remove files from the connected folder, repeat the request to see the updated index. The service logs will display the progress of indexing new and modified files.",[3206,8712,8714],{"id":8713},"ask-a-question","Ask a Question",[73,8716,8717],{},"You can now run the RAG service. Start by asking a simple question. For example:",[3418,8719,8721],{"className":7629,"code":8720,"language":7631,"meta":23,"style":23},"curl -X 'POST' \\\n  'http:\u002F\u002F0.0.0.0:8000\u002Fv2\u002Fanswer' \\\n  -H 'accept: *\u002F*' \\\n  -H 'Content-Type: application\u002Fjson' \\\n  -d '{\n  \"prompt\": \"What is the start date of the contract?\"\n}'\n",[3061,8722,8723,8728,8733,8738,8743,8748,8753],{"__ignoreMap":23},[1291,8724,8725],{"class":3427,"line":3428},[1291,8726,8727],{},"curl -X 'POST' \\\n",[1291,8729,8730],{"class":3427,"line":24},[1291,8731,8732],{},"  'http:\u002F\u002F0.0.0.0:8000\u002Fv2\u002Fanswer' \\\n",[1291,8734,8735],{"class":3427,"line":675},[1291,8736,8737],{},"  -H 'accept: *\u002F*' \\\n",[1291,8739,8740],{"class":3427,"line":3542},[1291,8741,8742],{},"  -H 'Content-Type: application\u002Fjson' \\\n",[1291,8744,8745],{"class":3427,"line":3547},[1291,8746,8747],{},"  -d '{\n",[1291,8749,8750],{"class":3427,"line":3572},[1291,8751,8752],{},"  \"prompt\": \"What is the start date of the contract?\"\n",[1291,8754,8755],{"class":3427,"line":3614},[1291,8756,8757],{},"}'\n",[73,8759,8760],{},"This will return the following answer:",[3418,8762,8764],{"className":8621,"code":8763,"language":8623,"meta":23,"style":23},"{\"response\": \"The start date of the contract is December 21, 2015.\"}\n",[3061,8765,8766],{"__ignoreMap":23},[1291,8767,8768,8771,8773,8775,8777,8779,8781,8784,8786],{"class":3427,"line":3428},[1291,8769,8770],{"class":3435},"{",[1291,8772,3691],{"class":3435},[1291,8774,4991],{"class":7739},[1291,8776,3691],{"class":3435},[1291,8778,4390],{"class":3435},[1291,8780,3705],{"class":3435},[1291,8782,8783],{"class":3439},"The start date of the contract is December 21, 2015.",[1291,8785,3691],{"class":3435},[1291,8787,4441],{"class":3435},[73,8789,8790],{},"If the answer is in any of your indexed documents, the pipeline will return the most accurate, up-to-date response—powered by real-time AI.",[140,8792,8794],{"id":8793},"conclusions","Conclusions",[73,8796,8797],{},"In this app template, you:",[145,8799,8800,8803],{},[148,8801,8802],{},"Learned about Real-time RAG and key considerations for Enterprise RAG applications.",[148,8804,8805],{},"Successfully created and deployed a Enterprise RAG application using Pathway Live Data Framework with Microsoft SharePoint as a data source.",[73,8807,8808],{},"By leveraging the combined power of Pathway Live Data Framework and Microsoft SharePoint, you built a secure, efficient and scalable Enterprise RAG system tailored to your specific needs. This traditional RAG setup can be refined with rerankers, adaptive RAG, multimodal RAG, and other techniques.",[140,8810,8812],{"id":8811},"additional-resources-on-enterprise-rag","Additional Resources on Enterprise RAG",[145,8814,8815,8832,8842],{},[148,8816,8817,8820,8821,8826,8827,694],{},[169,8818,8819],{},"Slides AI Search",": Set up high accuracy multimodal RAG pipelines for presentations and PDFs on the ",[77,8822,8825],{"href":8823,"rel":8824},"https:\u002F\u002Fgithub.com\u002Fpathwaycom\u002Fllm-app\u002Ftree\u002Fmain\u002Ftemplates\u002Fslides_ai_search",[81],"Slides AI Search GitHub repo",". This template helps you build a multi-modal search service using GPT-4o with Metadata Extraction and Vector Index. You can also try out the ",[77,8828,8831],{"href":8829,"rel":8830},"https:\u002F\u002Fsales-rag-chat.demo.pathway.com\u002F#search-your-slide-decks",[81],"hosted demo here",[148,8833,8834,8837,8838,694],{},[169,8835,8836],{},"Private RAG with Connected Data Sources using Mistral, Ollama, and Pathway Live Data Framework",": Set up a private RAG pipeline with adaptive retrieval using Pathway Live Data Framework, Mistral, and Ollama. This app template allows you to run the entire application locally while ensuring low costs without compromising on accuracy, making it ideal for production use-cases with sensitive data and explainable AI needs. Get started with the ",[77,8839,8841],{"href":8840},"\u002Fblog\u002Fdeepseek-ollama","app template here",[148,8843,8844,8847,8848,694],{},[169,8845,8846],{},"Multimodal RAG for PDFs with Text, Images, and Charts",": This showcase demonstrates how you can launch a MultiModal RAG pipeline that utilizes GPT-4o in the parsing stage. The Pathway Live Data Framework extracts information from unstructured financial documents in your folders, updating results as documents change or new ones arrive. Learn more ",[77,8849,3147],{"href":8850,"rel":8851},"https:\u002F\u002Fgithub.com\u002Fpathwaycom\u002Fllm-app\u002Ftree\u002Fmain\u002Ftemplates\u002Fmultimodal_rag",[81],[3189,8853,8855],{"id":8854},"are-you-looking-to-build-an-enterprise-rag-app","Are you looking to build an Enterprise RAG app?",[73,8857,8858],{},"Pathway is trusted by industry leaders such as NATO and Intel, and is natively available on both AWS and Azure Marketplaces. If you'd like to explore how Pathway Live Data Framework can support your RAG and Generative AI initiatives, we invite you to schedule a discovery session with our team.",[3189,8860,8862],{"id":8861},"troubleshooting","Troubleshooting",[73,8864,8865,8866,8871,8872,8876],{},"To provide feedback or report a bug, please raise an issue on our ",[77,8867,8870],{"href":8868,"rel":8869},"https:\u002F\u002Fgithub.com\u002Fpathwaycom\u002Fpathway\u002Fissues",[81],"issue tracker",". You can also join the Pathway Discord server (",[77,8873,8875],{"href":3072,"rel":8874},[81],"#get-help",") and let us know how the community can help you.",[7294,8878,8879,8883],{"href":7296,"icon":7297},[6061,8880,8881],{"v-slot:title":23},[73,8882,7302],{},[6061,8884,8885],{"v-slot:description":23},[73,8886,7307],{},[5019,8888,8889],{},"html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html pre.shiki code .s0W1g, html code.shiki .s0W1g{--shiki-default:#BABED8}html pre.shiki code .sAklC, html code.shiki .sAklC{--shiki-default:#89DDFF}html pre.shiki code .s-wAU, html code.shiki .s-wAU{--shiki-default:#F07178}html pre.shiki code .sdLwU, html code.shiki .sdLwU{--shiki-default:#82AAFF}html pre.shiki code .s7ZW3, html code.shiki .s7ZW3{--shiki-default:#BABED8;--shiki-default-font-style:italic}html pre.shiki code .sfyAc, html code.shiki .sfyAc{--shiki-default:#C3E88D}html pre.shiki code .saEQR, html code.shiki .saEQR{--shiki-default:#676E95;--shiki-default-font-style:italic}html pre.shiki code .s5Dmg, html code.shiki .s5Dmg{--shiki-default:#FFCB6B}html pre.shiki code .sJ14y, html code.shiki .sJ14y{--shiki-default:#C792EA}html pre.shiki code .sx098, html code.shiki .sx098{--shiki-default:#F78C6C}html pre.shiki code .sbqyR, html code.shiki .sbqyR{--shiki-default:#FF9CAC}",{"title":23,"searchDepth":24,"depth":24,"links":8891},[8892,8893,8894,8901,8915,8916],{"id":7368,"depth":24,"text":7369},{"id":7384,"depth":24,"text":7385},{"id":7409,"depth":24,"text":7410,"children":8895},[8896,8897,8898,8899,8900],{"id":7413,"depth":675,"text":7414},{"id":7431,"depth":675,"text":7432},{"id":7443,"depth":675,"text":7444},{"id":7461,"depth":675,"text":7462},{"id":7479,"depth":675,"text":7480},{"id":7494,"depth":24,"text":7495,"children":8902},[8903,8904,8905,8906,8907,8909,8911,8913,8914],{"id":7516,"depth":675,"text":7517},{"id":7574,"depth":675,"text":7575},{"id":7622,"depth":675,"text":7623},{"id":7652,"depth":675,"text":7653},{"id":7668,"depth":675,"text":8908},"Step 3: Create a .env File and put your Open AI API key",{"id":7694,"depth":675,"text":8910},"Step 4: Modify the app.yaml File",{"id":8454,"depth":675,"text":8912},"Step 5: Obtain and Update the Pathway Live Data Framework License Key in app.py",{"id":8513,"depth":675,"text":8514},{"id":8598,"depth":675,"text":8599},{"id":8793,"depth":24,"text":8794},{"id":8811,"depth":24,"text":8812,"children":8917},[8918,8919],{"id":8854,"depth":675,"text":8855},{"id":8861,"depth":675,"text":8862},"‘Learn how to build a production-ready, real-time RAG pipeline using Pathway Live Data Framework\\’s SharePoint connector for live enterprise data’",{"aside":35,"layout":90,"thumbnail":8922,"tags":8924,"date":8925,"updatedDate":8926,"keywords":8927,"docker_github_link":8933},{"src":8923,"fit":5044},"\u002Fassets\u002Fcontent\u002Fshowcases\u002Fenterprise_sharepoint_rag\u002FEnterprise_RAG-thumbnail.png",[5047,3627,632,6268],"2024-07-15","2025-03-24",[5049,5050,8928,8929,8930,8931,8932],"Real-time RAG","LiveAI™","Enterprise RAG","Docker","SharePoint","https:\u002F\u002Fgithub.com\u002Fpathwaycom\u002Fllm-app\u002Ftree\u002Fmain\u002Ftemplates\u002Fquestion_answering_rag","\u002Fframework\u002Fblog\u002Fenterprise_rag_sharepoint",{"title":7340,"description":8920},{"loc":8934},"framework\u002Fblog\u002F1004.enterprise_rag_sharepoint","yjB5OLSmn0qMK6hUYGWVKygO_6Cg36MvY1JntQBsXwg",{"id":8940,"title":8941,"author":8942,"body":8948,"description":15669,"extension":27,"meta":15670,"navigation":35,"path":15678,"seo":15679,"sitemap":15680,"stem":15681,"__hash__":15682},"content\u002Fframework\u002Fblog\u002F1005.evaluating-rag.md","Evaluating RAG applications with RAGAS",{"id":8943,"url":8944,"name":8945,"description":8946,"img":10,"provider":11,"linkedin":8947},"berke","berke-can-rizai","Berke Can Rizai","LLM Research Engineer","https:\u002F\u002Fwww.linkedin.com\u002Fin\u002Fberke-can-rizai\u002F",{"type":13,"value":8949,"toc":15648},[8950,8954,8957,8965,8968,8971,8997,9001,9083,9086,9097,9100,9147,9150,9168,9176,9179,9186,9193,9201,9212,9219,9222,9254,9257,9260,9263,9266,9274,9277,9281,9287,9357,9360,9708,9740,9746,9773,9807,9811,9814,9835,9887,9953,10129,10183,10186,10211,10259,10265,10292,10295,10322,10325,10347,10358,10373,10376,10389,10392,10398,10406,10409,10426,10429,10495,11136,11140,11196,11199,11213,11293,11296,11312,11315,11318,11380,11383,11386,11389,11785,11805,11825,11896,11899,11902,11921,11926,12276,12296,12305,12311,12314,12321,12348,12351,12377,12380,12452,12456,12459,12467,12470,12473,12476,12482,12498,12992,13006,13039,13059,13080,13089,13095,13102,13117,13161,13165,13173,13639,13653,13685,13721,13730,13736,13739,13781,13785,14223,14237,14273,14282,14288,14291,14333,14336,14350,14358,14376,14409,14861,14875,14911,14920,14926,14929,14932,14974,14977,14980,14983,15478,15492,15532,15541,15547,15550,15592,15595,15598,15601,15604,15607,15616,15619,15645],[68,8951,8953],{"id":8952},"evaluating-rag-applications-with-ragas","Evaluating RAG Applications with RAGAS",[73,8955,8956],{},"Pathway streamlines the process of building RAG applications with always up-to-date knowledge. It empowers you to connect your LLM to live data sources and eliminates the need for separate ETL pipelines for knowledge management.",[73,8958,8959,8960,694],{},"However, simply building and deploying a RAG app isn't enough, and evaluations shouldn't be treated as an afterthought. In Pathway, we rely on frequent evaluation runs to keep our offerings reliable. This also prevents us from introducing any silent bugs into the pipeline.\nThis guide offers a simplified look at how we evaluate our RAG solutions at Pathway. For a detailed view of the full pipeline, including additional evaluation components and logging, check out the  ",[77,8961,8964],{"href":8962,"rel":8963},"https:\u002F\u002Fgithub.com\u002Fpathwaycom\u002Fpathway\u002Ftree\u002Fmain\u002Fintegration_tests\u002Frag_evals",[81],"complete CI workflow",[73,8966,8967],{},"You need to ensure that your RAG application delivers accurate and reliable results with YOUR data. This is where our blog post dives in. You will explore RAG evaluations, create synthetic test data if necessary, and learn how to optimize your Pathway Live Data Framework RAG app.",[73,8969,8970],{},"Here's a sneak peek of what we'll cover:",[145,8972,8973,8979,8985,8991],{},[148,8974,8975,8978],{},[169,8976,8977],{},"Essential evaluation metrics",": We'll unpack key metrics used to assess different aspects of your RAG pipeline, including retrieval accuracy, generation quality, and overall system effectiveness.",[148,8980,8981,8984],{},[169,8982,8983],{},"Creating synthetic dataset",": Build test data based on your files.",[148,8986,8987,8990],{},[169,8988,8989],{},"Tweaking RAGAS to suit your needs",": From modifying metric calculations to modifying the LLM evaluator.",[148,8992,8993,8996],{},[169,8994,8995],{},"Optimizing your Pathway Live Data Framework RAG application",": Discover how to fine-tune your RAG system for optimal performance, tailored to your specific use case and dataset.",[140,8998,9000],{"id":8999},"table-of-contents","Table of contents",[145,9002,9003,9009,9015,9021,9027,9033,9039,9077],{},[148,9004,9005],{},[77,9006,9008],{"href":9007},"#evaluation-metrics","Evaluation Metrics",[148,9010,9011],{},[77,9012,9014],{"href":9013},"#setup-and-installation","Setup and Installation",[148,9016,9017],{},[77,9018,9020],{"href":9019},"#dataset","Dataset",[148,9022,9023],{},[77,9024,9026],{"href":9025},"#synthetic-dataset-creation","Synthetic Dataset Creation",[148,9028,9029],{},[77,9030,9032],{"href":9031},"#launching-the-pathway-rag-app","Launching the Pathway Live Data Framework RAG App",[148,9034,9035],{},[77,9036,9038],{"href":9037},"#evaluate-with-the-dataset","Evaluate with the Dataset",[148,9040,9041,9045],{},[77,9042,9044],{"href":9043},"#improving-the-accuracy","Improving-the-Accuracy:",[145,9046,9047,9053,9059,9065,9071],{},[148,9048,9049],{},[77,9050,9052],{"href":9051},"#hybrid-index","Hybrid Index",[148,9054,9055],{},[77,9056,9058],{"href":9057},"#using-a-different-parser","Parser",[148,9060,9061],{},[77,9062,9064],{"href":9063},"#lets-try-the-same-parser-with-the-semantic-search-retriever","Same Parser with Semantic Search",[148,9066,9067],{},[77,9068,9070],{"href":9069},"#changing-the-embedder","Changing the Embedder",[148,9072,9073],{},[77,9074,9076],{"href":9075},"#changing-the-prompt","Changing the Prompt",[148,9078,9079],{},[77,9080,9082],{"href":9081},"#summary-findings","Summary & Findings",[140,9084,9008],{"id":9085},"evaluation-metrics",[73,9087,9088,9089,9092,9093,9096],{},"RAG evaluation metrics can be categorized into two parts, ",[3061,9090,9091],{},"\"retrieval\""," metrics and ",[3061,9094,9095],{},"\"generation\""," metrics. Retrieval metrics are usually found in recommendation or information retrieval domains. Whereas generation metrics cover LLM related topics, including how the LLM makes use of the given context, hallucinations, truthfulness and so on.",[73,9098,9099],{},"Some of the retrieval metrics are:",[145,9101,9102,9112,9122,9135,9141],{},[148,9103,9104,9107,9108,9111],{},[3061,9105,9106],{},"Hit@k",": Measures the proportion of times that the relevant item appears in the top-K retrieved results. This can be also mentioned as ",[3061,9109,9110],{},"\"Context Recall\"",", that is assuming there is only one relevant document.",[148,9113,9114,9117,9118,9121],{},[3061,9115,9116],{},"Context Recall",": Focuses on the comprehensiveness of the retrieved context, measuring the proportion of all relevant documents in the corpus that are successfully retrieved. It is formally defined as ",[3061,9119,9120],{},"(Number of Relevant Items Retrieved) \u002F (Total Number of Relevant Items in Corpus)",". In simpler terms, recall tells you \"Of all the relevant documents that could have been retrieved, how many were actually retrieved?\". High recall signifies that your retrieval system is good at finding most of the relevant context available.",[148,9123,9124,9127,9128,9131,9132,9134],{},[3061,9125,9126],{},"Context Precision",": This metric focuses on the quality of the retrieved context by measuring the proportion of retrieved documents that are actually relevant to the query. Formally, it is calculated as ",[3061,9129,9130],{},"(Number of Relevant Items Retrieved) \u002F (Total Number of Items Retrieved)",". In contrast to ",[3061,9133,9106],{}," (or \"Context Recall\") which emphasizes retrieving at least one relevant item within the top-K results, precision evaluates the relevance concentration within the retrieved set. Essentially, precision answers: \"Of all documents retrieved, how many were relevant?\".",[148,9136,9137,9140],{},[3061,9138,9139],{},"Mean Reciprocal Rank (MRR)",": Evaluates the ranking of retrieved documents by focusing on the position of the first relevant document in the ranked list. For each query, the Reciprocal Rank (RR) is calculated as 1 \u002F rank, where rank is the position of the first relevant document. If there are no relevant documents in the retrieved list, RR is 0. MRR is then the mean of these reciprocal ranks across a set of queries. Generally, you shouldn't stress about this metric in your RAG application. This is largely because the benefit of having the most relevant context ranked at the top is less critical for LLMs.",[148,9142,9143,9146],{},[3061,9144,9145],{},"Normalized Discounted Cumulative Gain (NDCG)",": A ranking-based metric that evaluates the quality of retrieved results by considering both relevance and position. Unlike Hit@k and MRR, which primarily focus on whether relevant items appear at the top, NDCG assigns higher importance to highly relevant documents appearing earlier in the ranked list. This metric can be useful when you have more than one relevant items and their relevancy has float labels instead of booleans.",[73,9148,9149],{},"As for the generation metrics:",[145,9151,9152,9158],{},[148,9153,9154,9157],{},[3061,9155,9156],{},"Faithfulness",": Evaluates how grounded the LLM's answer is in the retrieved context. It measures whether the claims in the generated answer are supported by the provided context. Penalizes the hallucinations.",[148,9159,9160,9163,9164,9167],{},[3061,9161,9162],{},"Answer correctness",": Factual correctness of the response. Even though this is under ",[3061,9165,9166],{},"generation"," category, this metric generally covers the whole RAG pipeline.",[73,9169,9170,9171,9175],{},"These are only small subset of the all metrics, however we found these set of metrics to be reliable indicators of the whole RAG application performance. If you are curious about the list of all available metrics in the RAGAS, check it out ",[77,9172,3147],{"href":9173,"rel":9174},"https:\u002F\u002Fdocs.ragas.io\u002Fen\u002Flatest\u002Fconcepts\u002Fmetrics\u002Favailable_metrics\u002F",[81],"!",[140,9177,9014],{"id":9178},"setup-and-installation",[73,9180,9181,9182,9185],{},"Magic library is used for detecting file types in the ",[3061,9183,9184],{},"UnstructuredParser"," module.",[73,9187,9188,9189,9192],{},"If you are running this notebook on ",[169,9190,9191],{},"MacOS",", you can install it with:",[9194,9195,9196],"blockquote",{},[73,9197,9198],{},[3061,9199,9200],{},"brew install libmagic",[73,9202,9203,9204,9207,9208,9211],{},"If you are running the notebook on ",[169,9205,9206],{},"colab"," or any ",[169,9209,9210],{},"linux"," environment, you can install by running:",[9194,9213,9214],{},[73,9215,9216],{},[3061,9217,9218],{},"apt install libmagic1",[73,9220,9221],{},"Install the rest of the dependencies:",[3418,9223,9225],{"className":3420,"code":9224,"language":3422,"meta":23,"style":23},"pip install \"pathway[all]\"\npip install ragas\npip install langchain-openai\n",[3061,9226,9227,9239,9244],{"__ignoreMap":23},[1291,9228,9229,9232,9234,9237],{"class":3427,"line":3428},[1291,9230,9231],{"class":3431},"pip install ",[1291,9233,3691],{"class":3435},[1291,9235,9236],{"class":3439},"pathway[all]",[1291,9238,3746],{"class":3435},[1291,9240,9241],{"class":3427,"line":24},[1291,9242,9243],{"class":3431},"pip install ragas\n",[1291,9245,9246,9249,9251],{"class":3427,"line":675},[1291,9247,9248],{"class":3431},"pip install langchain",[1291,9250,3949],{"class":3435},[1291,9252,9253],{"class":3431},"openai\n",[140,9255,9020],{"id":9256},"dataset",[73,9258,9259],{},"Having a representative dataset is crucial for effective evaluations. It is recommended to set aside dedicated time to create a gold-standard dataset that accurately reflects your use case.",[73,9261,9262],{},"To ensure robust evaluation, consider splitting your dataset into validation and test sets. Validation set helps fine-tune the retrieval and generation parameters, allowing for iterative improvements without overfitting to the final benchmark. Test set is kept separate from the tuning process, provides an unbiased measure of the performance, ensuring that optimizations generalize beyond the development phase.",[73,9264,9265],{},"Here are the steps we will follow:",[145,9267,9268,9271],{},[148,9269,9270],{},"Prepare your docs to be in markdown format",[148,9272,9273],{},"Create and save synthetic dataset with RAGAS",[140,9275,9026],{"id":9276},"synthetic-dataset-creation",[3189,9278,9280],{"id":9279},"prepare-the-documents-as-markdown","Prepare the documents as markdown",[73,9282,9283,9284,9286],{},"Here, we will use Pathway parsers to parse our document's content and save it as a markdown. Then, we will create a synthetic dataset based on the file contents with the ",[3061,9285,7849],{},". It is a good idea to create synthetic data with a model that is different than the one in your application. This is because LLM's bias will influence the wording, queries, and answers in your dataset. This may introduce unwanted bias in the metrics.",[3418,9288,9290],{"className":3420,"code":9289,"language":3422,"meta":23,"style":23},"import os\nimport getpass\n\nimport pandas as pd\nimport pathway as pw\n\n\nfrom pathway.xpacks.llm import parsers\n\n",[3061,9291,9292,9298,9304,9308,9320,9330,9334,9338],{"__ignoreMap":23},[1291,9293,9294,9296],{"class":3427,"line":3428},[1291,9295,3476],{"class":3475},[1291,9297,3486],{"class":3431},[1291,9299,9300,9302],{"class":3427,"line":24},[1291,9301,3476],{"class":3475},[1291,9303,5209],{"class":3431},[1291,9305,9306],{"class":3427,"line":675},[1291,9307,3526],{"emptyLinePlaceholder":35},[1291,9309,9310,9312,9315,9317],{"class":3427,"line":3542},[1291,9311,3476],{"class":3475},[1291,9313,9314],{"class":3431}," pandas ",[1291,9316,3536],{"class":3475},[1291,9318,9319],{"class":3431}," pd\n",[1291,9321,9322,9324,9326,9328],{"class":3427,"line":3547},[1291,9323,3476],{"class":3475},[1291,9325,3533],{"class":3431},[1291,9327,3536],{"class":3475},[1291,9329,3539],{"class":3431},[1291,9331,9332],{"class":3427,"line":3572},[1291,9333,3526],{"emptyLinePlaceholder":35},[1291,9335,9336],{"class":3427,"line":3614},[1291,9337,3526],{"emptyLinePlaceholder":35},[1291,9339,9340,9342,9344,9346,9348,9350,9352,9354],{"class":3427,"line":3640},[1291,9341,3550],{"class":3475},[1291,9343,3553],{"class":3431},[1291,9345,694],{"class":3435},[1291,9347,3581],{"class":3431},[1291,9349,694],{"class":3435},[1291,9351,3586],{"class":3431},[1291,9353,3476],{"class":3475},[1291,9355,9356],{"class":3431}," parsers\n",[73,9358,9359],{},"Define the helpers to save docs as markdown.\nThis reads the file, parses it, and saves to specified folder with the same filename.",[3418,9361,9363],{"className":3420,"code":9362,"language":3422,"meta":23,"style":23},"async def document_to_markdown(\n    input_path: str, output_folder: str, parser: pw.UDF = parsers.UnstructuredParser()\n) -> None:\n    os.makedirs(output_folder, exist_ok=True)\n\n    with open(input_path, \"rb\") as f:\n        file_bytes = f.read()\n        content = await parser.func(file_bytes)\n        file_md = \"\\n\\n\".join([split[0] for split in content])\n\n    md_file_name = os.path.splitext(os.path.basename(input_path))[0] + \".md\"\n\n    with open(f\"{output_folder}{os.path.sep}{md_file_name}\", \"w\") as f:\n        f.write(file_md)\n",[3061,9364,9365,9378,9421,9431,9452,9456,9487,9502,9526,9573,9577,9629,9633,9691],{"__ignoreMap":23},[1291,9366,9367,9370,9373,9376],{"class":3427,"line":3428},[1291,9368,9369],{"class":7739},"async",[1291,9371,9372],{"class":7739}," def",[1291,9374,9375],{"class":3812}," document_to_markdown",[1291,9377,3874],{"class":3435},[1291,9379,9380,9383,9385,9388,9390,9393,9395,9397,9399,9402,9404,9406,9408,9411,9413,9415,9417,9419],{"class":3427,"line":24},[1291,9381,9382],{"class":3819},"    input_path",[1291,9384,4390],{"class":3435},[1291,9386,9387],{"class":6356}," str",[1291,9389,3566],{"class":3435},[1291,9391,9392],{"class":3819}," output_folder",[1291,9394,4390],{"class":3435},[1291,9396,9387],{"class":6356},[1291,9398,3566],{"class":3435},[1291,9400,9401],{"class":3819}," parser",[1291,9403,4390],{"class":3435},[1291,9405,4073],{"class":3431},[1291,9407,694],{"class":3435},[1291,9409,9410],{"class":3457},"UDF",[1291,9412,3702],{"class":3435},[1291,9414,3601],{"class":3431},[1291,9416,694],{"class":3435},[1291,9418,9184],{"class":3812},[1291,9420,4871],{"class":3435},[1291,9422,9423,9425,9428],{"class":3427,"line":675},[1291,9424,713],{"class":3435},[1291,9426,9427],{"class":3435}," ->",[1291,9429,9430],{"class":3435}," None:\n",[1291,9432,9433,9435,9437,9440,9442,9445,9447,9450],{"class":3427,"line":3542},[1291,9434,6477],{"class":3431},[1291,9436,694],{"class":3435},[1291,9438,9439],{"class":3812},"makedirs",[1291,9441,3816],{"class":3435},[1291,9443,9444],{"class":3812},"output_folder",[1291,9446,3566],{"class":3435},[1291,9448,9449],{"class":3819}," exist_ok",[1291,9451,5645],{"class":3435},[1291,9453,9454],{"class":3427,"line":3547},[1291,9455,3526],{"emptyLinePlaceholder":35},[1291,9457,9458,9461,9464,9466,9469,9471,9473,9476,9478,9480,9482,9485],{"class":3427,"line":3572},[1291,9459,9460],{"class":3475},"    with",[1291,9462,9463],{"class":3812}," open",[1291,9465,3816],{"class":3435},[1291,9467,9468],{"class":3812},"input_path",[1291,9470,3566],{"class":3435},[1291,9472,3705],{"class":3435},[1291,9474,9475],{"class":3439},"rb",[1291,9477,3691],{"class":3435},[1291,9479,713],{"class":3435},[1291,9481,3506],{"class":3475},[1291,9483,9484],{"class":3431}," f",[1291,9486,5243],{"class":3435},[1291,9488,9489,9492,9494,9496,9498,9500],{"class":3427,"line":3614},[1291,9490,9491],{"class":3431},"        file_bytes ",[1291,9493,3738],{"class":3435},[1291,9495,9484],{"class":3431},[1291,9497,694],{"class":3435},[1291,9499,4088],{"class":3812},[1291,9501,4871],{"class":3435},[1291,9503,9504,9507,9509,9512,9514,9516,9519,9521,9524],{"class":3427,"line":3640},[1291,9505,9506],{"class":3431},"        content ",[1291,9508,3738],{"class":3435},[1291,9510,9511],{"class":3475}," await",[1291,9513,9401],{"class":3431},[1291,9515,694],{"class":3435},[1291,9517,9518],{"class":3812},"func",[1291,9520,3816],{"class":3435},[1291,9522,9523],{"class":3812},"file_bytes",[1291,9525,3827],{"class":3435},[1291,9527,9528,9531,9533,9535,9538,9540,9542,9545,9548,9551,9553,9556,9558,9561,9564,9567,9570],{"class":3427,"line":3665},[1291,9529,9530],{"class":3431},"        file_md ",[1291,9532,3738],{"class":3435},[1291,9534,3705],{"class":3435},[1291,9536,9537],{"class":3431},"\\n\\n",[1291,9539,3691],{"class":3435},[1291,9541,694],{"class":3435},[1291,9543,9544],{"class":3812},"join",[1291,9546,9547],{"class":3435},"([",[1291,9549,9550],{"class":3812},"split",[1291,9552,3688],{"class":3435},[1291,9554,9555],{"class":3451},"0",[1291,9557,3699],{"class":3435},[1291,9559,9560],{"class":3475}," for",[1291,9562,9563],{"class":3812}," split ",[1291,9565,9566],{"class":3475},"in",[1291,9568,9569],{"class":3812}," content",[1291,9571,9572],{"class":3435},"])\n",[1291,9574,9575],{"class":3427,"line":3670},[1291,9576,3526],{"emptyLinePlaceholder":35},[1291,9578,9579,9582,9584,9586,9588,9590,9592,9595,9597,9599,9601,9603,9605,9608,9610,9612,9615,9617,9619,9622,9624,9627],{"class":3427,"line":3677},[1291,9580,9581],{"class":3431},"    md_file_name ",[1291,9583,3738],{"class":3435},[1291,9585,5236],{"class":3431},[1291,9587,694],{"class":3435},[1291,9589,8679],{"class":3457},[1291,9591,694],{"class":3435},[1291,9593,9594],{"class":3812},"splitext",[1291,9596,3816],{"class":3435},[1291,9598,3680],{"class":3812},[1291,9600,694],{"class":3435},[1291,9602,8679],{"class":3457},[1291,9604,694],{"class":3435},[1291,9606,9607],{"class":3812},"basename",[1291,9609,3816],{"class":3435},[1291,9611,9468],{"class":3812},[1291,9613,9614],{"class":3435},"))[",[1291,9616,9555],{"class":3451},[1291,9618,3699],{"class":3435},[1291,9620,9621],{"class":3435}," +",[1291,9623,3705],{"class":3435},[1291,9625,9626],{"class":3439},".md",[1291,9628,3746],{"class":3435},[1291,9630,9631],{"class":3427,"line":3877},[1291,9632,3526],{"emptyLinePlaceholder":35},[1291,9634,9635,9637,9639,9641,9644,9646,9648,9650,9653,9655,9657,9659,9661,9664,9666,9669,9672,9674,9676,9678,9681,9683,9685,9687,9689],{"class":3427,"line":3916},[1291,9636,9460],{"class":3475},[1291,9638,9463],{"class":3812},[1291,9640,3816],{"class":3435},[1291,9642,9643],{"class":7739},"f",[1291,9645,3691],{"class":3439},[1291,9647,8770],{"class":3451},[1291,9649,9444],{"class":3812},[1291,9651,9652],{"class":3451},"}{",[1291,9654,3680],{"class":3812},[1291,9656,694],{"class":3435},[1291,9658,8679],{"class":3457},[1291,9660,694],{"class":3435},[1291,9662,9663],{"class":3457},"sep",[1291,9665,9652],{"class":3451},[1291,9667,9668],{"class":3812},"md_file_name",[1291,9670,9671],{"class":3451},"}",[1291,9673,3691],{"class":3439},[1291,9675,3566],{"class":3435},[1291,9677,3705],{"class":3435},[1291,9679,9680],{"class":3439},"w",[1291,9682,3691],{"class":3435},[1291,9684,713],{"class":3435},[1291,9686,3506],{"class":3475},[1291,9688,9484],{"class":3431},[1291,9690,5243],{"class":3435},[1291,9692,9693,9696,9698,9701,9703,9706],{"class":3427,"line":4519},[1291,9694,9695],{"class":3431},"        f",[1291,9697,694],{"class":3435},[1291,9699,9700],{"class":3812},"write",[1291,9702,3816],{"class":3435},[1291,9704,9705],{"class":3812},"file_md",[1291,9707,3827],{"class":3435},[3418,9709,9711],{"className":3420,"code":9710,"language":3422,"meta":23,"style":23},"MARKDOWN_FOLDER = \".\u002Fmarkdown_docs\"\nINPUT_FOLDER = \".\u002Fdata\"\n",[3061,9712,9713,9727],{"__ignoreMap":23},[1291,9714,9715,9718,9720,9722,9725],{"class":3427,"line":3428},[1291,9716,9717],{"class":3431},"MARKDOWN_FOLDER ",[1291,9719,3738],{"class":3435},[1291,9721,3705],{"class":3435},[1291,9723,9724],{"class":3439},".\u002Fmarkdown_docs",[1291,9726,3746],{"class":3435},[1291,9728,9729,9732,9734,9736,9738],{"class":3427,"line":24},[1291,9730,9731],{"class":3431},"INPUT_FOLDER ",[1291,9733,3738],{"class":3435},[1291,9735,3705],{"class":3435},[1291,9737,3928],{"class":3439},[1291,9739,3746],{"class":3435},[73,9741,9742,9743,694],{},"Download the Alphabet 10K report as an example PDF. Feel free to skip this step if you want to use your own documents. You will need to copy your documents to the ",[3061,9744,9745],{},"INPUT_FOLDER",[3418,9747,9749],{"className":3420,"code":9748,"language":3422,"meta":23,"style":23},"!wget -P \"$INPUT_FOLDER\" \"https:\u002F\u002Fgithub.com\u002Fpathwaycom\u002Fllm-app\u002Fblob\u002Fmain\u002Ftemplates\u002Fmultimodal_rag\u002Fdata\u002F20230203_alphabet_10K.pdf\"\n",[3061,9750,9751],{"__ignoreMap":23},[1291,9752,9753,9755,9757,9759,9761,9764,9766,9768,9771],{"class":3427,"line":3428},[1291,9754,3967],{"class":3431},[1291,9756,3949],{"class":3435},[1291,9758,3977],{"class":3431},[1291,9760,3691],{"class":3435},[1291,9762,9763],{"class":3439},"$INPUT_FOLDER",[1291,9765,3691],{"class":3435},[1291,9767,3705],{"class":3435},[1291,9769,9770],{"class":3439},"https:\u002F\u002Fgithub.com\u002Fpathwaycom\u002Fllm-app\u002Fblob\u002Fmain\u002Ftemplates\u002Fmultimodal_rag\u002Fdata\u002F20230203_alphabet_10K.pdf",[1291,9772,3746],{"class":3435},[3418,9774,9776],{"className":3420,"code":9775,"language":3422,"meta":23,"style":23},"await document_to_markdown(f\"{INPUT_FOLDER}\u002F20230203_alphabet_10K.pdf\", MARKDOWN_FOLDER)\n",[3061,9777,9778],{"__ignoreMap":23},[1291,9779,9780,9783,9785,9787,9789,9791,9793,9795,9797,9800,9802,9805],{"class":3427,"line":3428},[1291,9781,9782],{"class":3475},"await",[1291,9784,9375],{"class":3812},[1291,9786,3816],{"class":3435},[1291,9788,9643],{"class":7739},[1291,9790,3691],{"class":3439},[1291,9792,8770],{"class":3451},[1291,9794,9745],{"class":3812},[1291,9796,9671],{"class":3451},[1291,9798,9799],{"class":3439},"\u002F20230203_alphabet_10K.pdf\"",[1291,9801,3566],{"class":3435},[1291,9803,9804],{"class":3812}," MARKDOWN_FOLDER",[1291,9806,3827],{"class":3435},[3189,9808,9810],{"id":9809},"configuring-the-generations-and-creating-the-dataset","Configuring the Generations and Creating the Dataset",[73,9812,9813],{},"Now we are finished with the parsing of the documents, let's create the synthetic dataset with RAGAS.",[3418,9815,9817],{"className":3420,"code":9816,"language":3422,"meta":23,"style":23},"from langchain_community.document_loaders import DirectoryLoader\n",[3061,9818,9819],{"__ignoreMap":23},[1291,9820,9821,9823,9825,9827,9830,9832],{"class":3427,"line":3428},[1291,9822,3550],{"class":3475},[1291,9824,5724],{"class":3431},[1291,9826,694],{"class":3435},[1291,9828,9829],{"class":3431},"document_loaders ",[1291,9831,3476],{"class":3475},[1291,9833,9834],{"class":3431}," DirectoryLoader\n",[3418,9836,9838],{"className":3420,"code":9837,"language":3422,"meta":23,"style":23},"loader = DirectoryLoader(MARKDOWN_FOLDER, glob=\"**\u002F*.md\")\ndocs = loader.load()\n",[3061,9839,9840,9871],{"__ignoreMap":23},[1291,9841,9842,9845,9847,9850,9852,9855,9857,9860,9862,9864,9867,9869],{"class":3427,"line":3428},[1291,9843,9844],{"class":3431},"loader ",[1291,9846,3738],{"class":3435},[1291,9848,9849],{"class":3812}," DirectoryLoader",[1291,9851,3816],{"class":3435},[1291,9853,9854],{"class":3812},"MARKDOWN_FOLDER",[1291,9856,3566],{"class":3435},[1291,9858,9859],{"class":3819}," glob",[1291,9861,3738],{"class":3435},[1291,9863,3691],{"class":3435},[1291,9865,9866],{"class":3439},"**\u002F*.md",[1291,9868,3691],{"class":3435},[1291,9870,3827],{"class":3435},[1291,9872,9873,9875,9877,9880,9882,9885],{"class":3427,"line":24},[1291,9874,5789],{"class":3431},[1291,9876,3738],{"class":3435},[1291,9878,9879],{"class":3431}," loader",[1291,9881,694],{"class":3435},[1291,9883,9884],{"class":3812},"load",[1291,9886,4871],{"class":3435},[3418,9888,9890],{"className":3420,"code":9889,"language":3422,"meta":23,"style":23},"os.environ[\"OPENAI_API_KEY\"] = os.getenv(\"OPENAI_API_KEY\") or getpass.getpass(\n    \"Enter your OpenAI API key: \"\n)\n",[3061,9891,9892,9940,9949],{"__ignoreMap":23},[1291,9893,9894,9896,9898,9900,9902,9904,9906,9908,9910,9912,9914,9916,9919,9921,9923,9925,9927,9929,9932,9934,9936,9938],{"class":3427,"line":3428},[1291,9895,3680],{"class":3431},[1291,9897,694],{"class":3435},[1291,9899,3685],{"class":3457},[1291,9901,3688],{"class":3435},[1291,9903,3691],{"class":3435},[1291,9905,5228],{"class":3439},[1291,9907,3691],{"class":3435},[1291,9909,3699],{"class":3435},[1291,9911,3702],{"class":3435},[1291,9913,5236],{"class":3431},[1291,9915,694],{"class":3435},[1291,9917,9918],{"class":3812},"getenv",[1291,9920,3816],{"class":3435},[1291,9922,3691],{"class":3435},[1291,9924,5228],{"class":3439},[1291,9926,3691],{"class":3435},[1291,9928,713],{"class":3435},[1291,9930,9931],{"class":3435}," or",[1291,9933,5283],{"class":3431},[1291,9935,694],{"class":3435},[1291,9937,5288],{"class":3812},[1291,9939,3874],{"class":3435},[1291,9941,9942,9944,9947],{"class":3427,"line":24},[1291,9943,4382],{"class":3435},[1291,9945,9946],{"class":3439},"Enter your OpenAI API key: ",[1291,9948,3746],{"class":3435},[1291,9950,9951],{"class":3427,"line":675},[1291,9952,3827],{"class":3435},[3418,9954,9956],{"className":3420,"code":9955,"language":3422,"meta":23,"style":23},"from ragas.llms import LangchainLLMWrapper\nfrom ragas.embeddings import LangchainEmbeddingsWrapper\nfrom ragas.testset import TestsetGenerator\nfrom langchain_openai import ChatOpenAI\nfrom langchain_openai import OpenAIEmbeddings\n\ngenerator_llm = LangchainLLMWrapper(ChatOpenAI(model=\"gpt-4o\", temperature=0.0))\ngenerator_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings())\n\ngenerator = TestsetGenerator(\n    llm=generator_llm,\n    embedding_model=generator_embeddings,\n)\n",[3061,9957,9958,9975,9990,10006,10016,10026,10030,10069,10085,10089,10101,10113,10125],{"__ignoreMap":23},[1291,9959,9960,9962,9965,9967,9970,9972],{"class":3427,"line":3428},[1291,9961,3550],{"class":3475},[1291,9963,9964],{"class":3431}," ragas",[1291,9966,694],{"class":3435},[1291,9968,9969],{"class":3431},"llms ",[1291,9971,3476],{"class":3475},[1291,9973,9974],{"class":3431}," LangchainLLMWrapper\n",[1291,9976,9977,9979,9981,9983,9985,9987],{"class":3427,"line":24},[1291,9978,3550],{"class":3475},[1291,9980,9964],{"class":3431},[1291,9982,694],{"class":3435},[1291,9984,5472],{"class":3431},[1291,9986,3476],{"class":3475},[1291,9988,9989],{"class":3431}," LangchainEmbeddingsWrapper\n",[1291,9991,9992,9994,9996,9998,10001,10003],{"class":3427,"line":675},[1291,9993,3550],{"class":3475},[1291,9995,9964],{"class":3431},[1291,9997,694],{"class":3435},[1291,9999,10000],{"class":3431},"testset ",[1291,10002,3476],{"class":3475},[1291,10004,10005],{"class":3431}," TestsetGenerator\n",[1291,10007,10008,10010,10012,10014],{"class":3427,"line":3542},[1291,10009,3550],{"class":3475},[1291,10011,5353],{"class":3431},[1291,10013,3476],{"class":3475},[1291,10015,5965],{"class":3431},[1291,10017,10018,10020,10022,10024],{"class":3427,"line":3547},[1291,10019,3550],{"class":3475},[1291,10021,5353],{"class":3431},[1291,10023,3476],{"class":3475},[1291,10025,5358],{"class":3431},[1291,10027,10028],{"class":3427,"line":3572},[1291,10029,3526],{"emptyLinePlaceholder":35},[1291,10031,10032,10035,10037,10040,10042,10045,10047,10050,10052,10054,10056,10058,10060,10063,10065,10067],{"class":3427,"line":3614},[1291,10033,10034],{"class":3431},"generator_llm ",[1291,10036,3738],{"class":3435},[1291,10038,10039],{"class":3812}," LangchainLLMWrapper",[1291,10041,3816],{"class":3435},[1291,10043,10044],{"class":3812},"ChatOpenAI",[1291,10046,3816],{"class":3435},[1291,10048,10049],{"class":3819},"model",[1291,10051,3738],{"class":3435},[1291,10053,3691],{"class":3435},[1291,10055,7849],{"class":3439},[1291,10057,3691],{"class":3435},[1291,10059,3566],{"class":3435},[1291,10061,10062],{"class":3819}," temperature",[1291,10064,3738],{"class":3435},[1291,10066,4252],{"class":3451},[1291,10068,7178],{"class":3435},[1291,10070,10071,10074,10076,10079,10081,10083],{"class":3427,"line":3640},[1291,10072,10073],{"class":3431},"generator_embeddings ",[1291,10075,3738],{"class":3435},[1291,10077,10078],{"class":3812}," LangchainEmbeddingsWrapper",[1291,10080,3816],{"class":3435},[1291,10082,5113],{"class":3812},[1291,10084,6237],{"class":3435},[1291,10086,10087],{"class":3427,"line":3665},[1291,10088,3526],{"emptyLinePlaceholder":35},[1291,10090,10091,10094,10096,10099],{"class":3427,"line":3670},[1291,10092,10093],{"class":3431},"generator ",[1291,10095,3738],{"class":3435},[1291,10097,10098],{"class":3812}," TestsetGenerator",[1291,10100,3874],{"class":3435},[1291,10102,10103,10106,10108,10111],{"class":3427,"line":3677},[1291,10104,10105],{"class":3819},"    llm",[1291,10107,3738],{"class":3435},[1291,10109,10110],{"class":3812},"generator_llm",[1291,10112,4107],{"class":3435},[1291,10114,10115,10118,10120,10123],{"class":3427,"line":3877},[1291,10116,10117],{"class":3819},"    embedding_model",[1291,10119,3738],{"class":3435},[1291,10121,10122],{"class":3812},"generator_embeddings",[1291,10124,4107],{"class":3435},[1291,10126,10127],{"class":3427,"line":3916},[1291,10128,3827],{"class":3435},[3418,10130,10132],{"className":3420,"code":10131,"language":3422,"meta":23,"style":23},"# generate the dataset\n\ndataset = generator.generate_with_langchain_docs(\n    docs,\n    testset_size=20,\n)\n",[3061,10133,10134,10139,10143,10160,10167,10179],{"__ignoreMap":23},[1291,10135,10136],{"class":3427,"line":3428},[1291,10137,10138],{"class":3673},"# generate the dataset\n",[1291,10140,10141],{"class":3427,"line":24},[1291,10142,3526],{"emptyLinePlaceholder":35},[1291,10144,10145,10148,10150,10153,10155,10158],{"class":3427,"line":675},[1291,10146,10147],{"class":3431},"dataset ",[1291,10149,3738],{"class":3435},[1291,10151,10152],{"class":3431}," generator",[1291,10154,694],{"class":3435},[1291,10156,10157],{"class":3812},"generate_with_langchain_docs",[1291,10159,3874],{"class":3435},[1291,10161,10162,10165],{"class":3427,"line":3542},[1291,10163,10164],{"class":3812},"    docs",[1291,10166,4107],{"class":3435},[1291,10168,10169,10172,10174,10177],{"class":3427,"line":3547},[1291,10170,10171],{"class":3819},"    testset_size",[1291,10173,3738],{"class":3435},[1291,10175,10176],{"class":3451},"20",[1291,10178,4107],{"class":3435},[1291,10180,10181],{"class":3427,"line":3572},[1291,10182,3827],{"class":3435},[73,10184,10185],{},"Save the dataset into a file:",[3418,10187,10189],{"className":3420,"code":10188,"language":3422,"meta":23,"style":23},"dataset.to_jsonl(\"synthetic_dataset.jsonl\")\n",[3061,10190,10191],{"__ignoreMap":23},[1291,10192,10193,10195,10197,10200,10202,10204,10207,10209],{"class":3427,"line":3428},[1291,10194,9256],{"class":3431},[1291,10196,694],{"class":3435},[1291,10198,10199],{"class":3812},"to_jsonl",[1291,10201,3816],{"class":3435},[1291,10203,3691],{"class":3435},[1291,10205,10206],{"class":3439},"synthetic_dataset.jsonl",[1291,10208,3691],{"class":3435},[1291,10210,3827],{"class":3435},[3418,10212,10214],{"className":3420,"code":10213,"language":3422,"meta":23,"style":23},"from ragas import EvaluationDataset, SingleTurnSample\nfrom ragas.testset.synthesizers.testset_schema import Testset\n",[3061,10215,10216,10233],{"__ignoreMap":23},[1291,10217,10218,10220,10223,10225,10228,10230],{"class":3427,"line":3428},[1291,10219,3550],{"class":3475},[1291,10221,10222],{"class":3431}," ragas ",[1291,10224,3476],{"class":3475},[1291,10226,10227],{"class":3431}," EvaluationDataset",[1291,10229,3566],{"class":3435},[1291,10231,10232],{"class":3431}," SingleTurnSample\n",[1291,10234,10235,10237,10239,10241,10244,10246,10249,10251,10254,10256],{"class":3427,"line":24},[1291,10236,3550],{"class":3475},[1291,10238,9964],{"class":3431},[1291,10240,694],{"class":3435},[1291,10242,10243],{"class":3431},"testset",[1291,10245,694],{"class":3435},[1291,10247,10248],{"class":3431},"synthesizers",[1291,10250,694],{"class":3435},[1291,10252,10253],{"class":3431},"testset_schema ",[1291,10255,3476],{"class":3475},[1291,10257,10258],{"class":3431}," Testset\n",[73,10260,10261,10262,4390],{},"If you have a previously saved dataset, you can load it with ",[3061,10263,10264],{},"from_jsonl",[3418,10266,10268],{"className":3420,"code":10267,"language":3422,"meta":23,"style":23},"dataset = EvaluationDataset.from_jsonl(\"synthetic_dataset.jsonl\")\n",[3061,10269,10270],{"__ignoreMap":23},[1291,10271,10272,10274,10276,10278,10280,10282,10284,10286,10288,10290],{"class":3427,"line":3428},[1291,10273,10147],{"class":3431},[1291,10275,3738],{"class":3435},[1291,10277,10227],{"class":3431},[1291,10279,694],{"class":3435},[1291,10281,10264],{"class":3812},[1291,10283,3816],{"class":3435},[1291,10285,3691],{"class":3435},[1291,10287,10206],{"class":3439},[1291,10289,3691],{"class":3435},[1291,10291,3827],{"class":3435},[73,10293,10294],{},"You may also download the synthetic dataset we created from the example file:",[3418,10296,10298],{"className":3420,"code":10297,"language":3422,"meta":23,"style":23},"!wget -P \"$MARKDOWN_FOLDER\" \"https:\u002F\u002Fgist.githubusercontent.com\u002Fberkecanrizai\u002F4b036863a57cd6c93c7ca497c93abe2b\u002Fraw\u002F4569e19bfd95fad05885fee32046e0b0d5d9d2cb\u002Fsynthetic_dataset.jsonl\"\n",[3061,10299,10300],{"__ignoreMap":23},[1291,10301,10302,10304,10306,10308,10310,10313,10315,10317,10320],{"class":3427,"line":3428},[1291,10303,3967],{"class":3431},[1291,10305,3949],{"class":3435},[1291,10307,3977],{"class":3431},[1291,10309,3691],{"class":3435},[1291,10311,10312],{"class":3439},"$MARKDOWN_FOLDER",[1291,10314,3691],{"class":3435},[1291,10316,3705],{"class":3435},[1291,10318,10319],{"class":3439},"https:\u002F\u002Fgist.githubusercontent.com\u002Fberkecanrizai\u002F4b036863a57cd6c93c7ca497c93abe2b\u002Fraw\u002F4569e19bfd95fad05885fee32046e0b0d5d9d2cb\u002Fsynthetic_dataset.jsonl",[1291,10321,3746],{"class":3435},[73,10323,10324],{},"Have a peek at the dataset;",[3418,10326,10328],{"className":3420,"code":10327,"language":3422,"meta":23,"style":23},"dataset.to_pandas().head()\n",[3061,10329,10330],{"__ignoreMap":23},[1291,10331,10332,10334,10336,10339,10342,10345],{"class":3427,"line":3428},[1291,10333,9256],{"class":3431},[1291,10335,694],{"class":3435},[1291,10337,10338],{"class":3812},"to_pandas",[1291,10340,10341],{"class":3435},"().",[1291,10343,10344],{"class":3812},"head",[1291,10346,4871],{"class":3435},[9194,10348,10349],{},[73,10350,10351,10352,10354,10355,694],{},"Notes on repeatability: The scores presented in this notebook are averaged over three independent runs to ensure reliability. Two of these runs used the provided synthetic dataset generated with the ",[3061,10353,9184],{},", while the third run utilized the data from the ",[3061,10356,10357],{},"PypdfParser",[9194,10359,10360],{},[73,10361,10362,10363,10366,10367,10372],{},"We found that LLM based evaluations can vary wildly between the runs. Also note that variables such as the order of the documents, wording of the answer & question pairs, and the LLM can have big impact on these scores. We also found that score variability and reliability is one of the main weaknesses of the ",[3061,10364,10365],{},"RAGAS",". We plan to repeat these experiments with ",[77,10368,10371],{"href":10369,"rel":10370},"https:\u002F\u002Fgithub.com\u002Fconfident-ai\u002Fdeepeval",[81],"deepeval"," in the future, stay tuned!",[140,10374,9032],{"id":10375},"launching-the-pathway-live-data-framework-rag-app",[73,10377,10378,10379,3126,10384,10388],{},"Pathway ",[77,10380,10383],{"href":10381,"rel":10382},"https:\u002F\u002Fpathway.com\u002Fdevelopers\u002Fapi-docs\u002Fpathway-xpacks-llm\u002Fdocument_store#pathway.xpacks.llm.document_store.DocumentStore",[81],"DocumentStore",[77,10385,4834],{"href":10386,"rel":10387},"https:\u002F\u002Fpathway.com\u002Fdevelopers\u002Fapi-docs\u002Fpathway-xpacks-llm\u002Fquestion_answering#pathway.xpacks.llm.question_answering.BaseRAGQuestionAnswerer",[81]," provides end to end solution for RAG applications.",[73,10390,10391],{},"DocumentStore manages document ingestion from your data sources, as well as document processing that includes parsing, splitting, and the indexing.",[73,10393,10394,10395,10397],{},"BaseRAGQuestionAnswerer creates a Pathway ",[3061,10396,5050],{}," application that:",[145,10399,10400,10403],{},[148,10401,10402],{},"Indexes the documents (via DocumentStore)",[148,10404,10405],{},"Exposes the question answering endpoints",[73,10407,10408],{},"Let's keep things simple and test a naive RAG solution with the following components:",[145,10410,10411,10414,10417,10420,10423],{},[148,10412,10413],{},"Unstructured Parser",[148,10415,10416],{},"Token based splitter",[148,10418,10419],{},"OpenAI embedder",[148,10421,10422],{},"Hybrid index that combines semantic search and keyword based BM25 search",[148,10424,10425],{},"A barebones RAG prompt",[73,10427,10428],{},"For more information, check out the documentation:",[665,10430,10431,10439,10447,10455,10463,10471,10479,10487],{},[148,10432,10433,10438],{},[77,10434,10437],{"href":10435,"rel":10436},"https:\u002F\u002Fpathway.com\u002Fdevelopers\u002Fuser-guide\u002Fconnect\u002Flive-data-framework-connectors",[81],"Connectors",": Use Pathway’s file reader to ingest the files.",[148,10440,10441,10446],{},[77,10442,10445],{"href":10443,"rel":10444},"https:\u002F\u002Fpathway.com\u002Fdevelopers\u002Fapi-docs\u002Fpathway-xpacks-llm\u002Fparsers",[81],"Parsers",": Utilize the UnstructuredParser to parse the documents. This parser supports multiple file types, including PDF, DOCX, and PPTX.",[148,10448,10449,10454],{},[77,10450,10453],{"href":10451,"rel":10452},"https:\u002F\u002Fpathway.com\u002Fdevelopers\u002Fapi-docs\u002Fpathway-xpacks-llm\u002Fsplitters",[81],"Text Splitters",": Split the document content into chunks.",[148,10456,10457,10462],{},[77,10458,10461],{"href":10459,"rel":10460},"https:\u002F\u002Fpathway.com\u002Fdevelopers\u002Fapi-docs\u002Fpathway-xpacks-llm\u002Fembedders",[81],"Embedders",": Use OpenAI API for embeddings.",[148,10464,10465,10470],{},[77,10466,10469],{"href":10467,"rel":10468},"https:\u002F\u002Fpathway.com\u002Fdevelopers\u002Fapi-docs\u002Findexing#pathway.stdlib.indexing.BruteForceKnnFactory",[81],"Vector\u002FKNN Index"," (via BruteForceKnnFactory): Semantic index that is powered by an embedder.",[148,10472,10473,10478],{},[77,10474,10477],{"href":10475,"rel":10476},"https:\u002F\u002Fpathway.com\u002Fdevelopers\u002Fapi-docs\u002Findexing#pathway.stdlib.indexing.TantivyBM25",[81],"BM25"," (via TantivyBM25Factory): Keyword based BM25 search.",[148,10480,10481,10486],{},[77,10482,10485],{"href":10483,"rel":10484},"https:\u002F\u002Fpathway.com\u002Fdevelopers\u002Fapi-docs\u002Findexing#pathway.stdlib.indexing.HybridIndexFactory",[81],"HybridIndexFactory",": combines different indexes to build an hybrid index.",[148,10488,10489,10494],{},[77,10490,10493],{"href":10491,"rel":10492},"https:\u002F\u002Fpathway.com\u002Fdevelopers\u002Fapi-docs\u002Fpathway-xpacks-llm\u002Fprompts",[81],"Prompts",": Prompt template for RAG.",[3418,10496,10498],{"className":3420,"code":10497,"language":3422,"meta":23,"style":23},"from pathway.stdlib.indexing import BruteForceKnnFactory, HybridIndexFactory\nfrom pathway.stdlib.indexing.bm25 import TantivyBM25Factory\nfrom pathway.udfs import DiskCache\nfrom pathway.xpacks.llm import embedders, llms, parsers, splitters\nfrom pathway.xpacks.llm.document_store import DocumentStore\nfrom pathway.xpacks.llm.question_answering import BaseRAGQuestionAnswerer, RAGClient\nfrom pathway.xpacks.llm.servers import QASummaryRestServer\n\n\n# read the text files under the data folder, we can also read from Google Drive, Sharepoint, etc.\n# See connectors documentation: https:\u002F\u002Fpathway.com\u002Fdevelopers\u002Fuser-guide\u002Fconnect\u002Flive-data-framework-connectors to learn more\nfolder = pw.io.fs.read(\n    path=INPUT_FOLDER,\n    format=\"binary\",\n    with_metadata=True,\n)\n\n# list of data sources to be indexed\nsources = [folder]\n\n# define the document processing steps\nparser = parsers.UnstructuredParser()\n\ntext_splitter = splitters.TokenCountSplitter(min_tokens=150, max_tokens=450)\n\nembedder = embedders.OpenAIEmbedder(\n    cache_strategy=DiskCache(), retry_strategy=pw.udfs.ExponentialBackoffRetryStrategy()\n)\n\nindex = BruteForceKnnFactory(embedder=embedder)\n\nllm = llms.OpenAIChat(model=\"gpt-4o\", cache_strategy=DiskCache())\n\ndocument_store = DocumentStore(\n    docs=sources, parser=parser, splitter=text_splitter, retriever_factory=index\n)\n\nprompt_template: str = \"\"\"You are an assistant for question-answering tasks. \\\nUse the following pieces of retrieved context to answer the question. \\\nIf you don't know the answer, just say that you don't know.\n  Question: {query}\n\n  Context: {context}\n\n  Answer:\"\"\"\n\n# create the RAG app that will power the index, and serve the agent endpoint\nrag_app = BaseRAGQuestionAnswerer(\n    llm=llm,\n    indexer=document_store,\n    prompt_template=prompt_template,\n    search_topk=8,  # number of retrieved chunks for RAG\n)\n",[3061,10499,10500,10526,10551,10566,10596,10620,10646,10670,10674,10678,10683,10688,10710,10720,10734,10740,10744,10748,10753,10765,10769,10774,10788,10792,10827,10831,10846,10877,10881,10885,10904,10908,10944,10948,10959,10994,10998,11002,11022,11029,11034,11042,11046,11053,11057,11064,11068,11073,11084,11094,11106,11117,11132],{"__ignoreMap":23},[1291,10501,10502,10504,10506,10508,10511,10513,10516,10518,10521,10523],{"class":3427,"line":3428},[1291,10503,3550],{"class":3475},[1291,10505,3553],{"class":3431},[1291,10507,694],{"class":3435},[1291,10509,10510],{"class":3431},"stdlib",[1291,10512,694],{"class":3435},[1291,10514,10515],{"class":3431},"indexing ",[1291,10517,3476],{"class":3475},[1291,10519,10520],{"class":3431}," BruteForceKnnFactory",[1291,10522,3566],{"class":3435},[1291,10524,10525],{"class":3431}," HybridIndexFactory\n",[1291,10527,10528,10530,10532,10534,10536,10538,10541,10543,10546,10548],{"class":3427,"line":24},[1291,10529,3550],{"class":3475},[1291,10531,3553],{"class":3431},[1291,10533,694],{"class":3435},[1291,10535,10510],{"class":3431},[1291,10537,694],{"class":3435},[1291,10539,10540],{"class":3431},"indexing",[1291,10542,694],{"class":3435},[1291,10544,10545],{"class":3431},"bm25 ",[1291,10547,3476],{"class":3475},[1291,10549,10550],{"class":3431}," TantivyBM25Factory\n",[1291,10552,10553,10555,10557,10559,10561,10563],{"class":3427,"line":675},[1291,10554,3550],{"class":3475},[1291,10556,3553],{"class":3431},[1291,10558,694],{"class":3435},[1291,10560,3558],{"class":3431},[1291,10562,3476],{"class":3475},[1291,10564,10565],{"class":3431}," DiskCache\n",[1291,10567,10568,10570,10572,10574,10576,10578,10580,10582,10584,10586,10588,10590,10592,10594],{"class":3427,"line":3542},[1291,10569,3550],{"class":3475},[1291,10571,3553],{"class":3431},[1291,10573,694],{"class":3435},[1291,10575,3581],{"class":3431},[1291,10577,694],{"class":3435},[1291,10579,3586],{"class":3431},[1291,10581,3476],{"class":3475},[1291,10583,3591],{"class":3431},[1291,10585,3566],{"class":3435},[1291,10587,3596],{"class":3431},[1291,10589,3566],{"class":3435},[1291,10591,3601],{"class":3431},[1291,10593,3566],{"class":3435},[1291,10595,3611],{"class":3431},[1291,10597,10598,10600,10602,10604,10606,10608,10610,10612,10615,10617],{"class":3427,"line":3547},[1291,10599,3550],{"class":3475},[1291,10601,3553],{"class":3431},[1291,10603,694],{"class":3435},[1291,10605,3581],{"class":3431},[1291,10607,694],{"class":3435},[1291,10609,3627],{"class":3431},[1291,10611,694],{"class":3435},[1291,10613,10614],{"class":3431},"document_store ",[1291,10616,3476],{"class":3475},[1291,10618,10619],{"class":3431}," DocumentStore\n",[1291,10621,10622,10624,10626,10628,10630,10632,10634,10636,10638,10640,10642,10644],{"class":3427,"line":3572},[1291,10623,3550],{"class":3475},[1291,10625,3553],{"class":3431},[1291,10627,694],{"class":3435},[1291,10629,3581],{"class":3431},[1291,10631,694],{"class":3435},[1291,10633,3627],{"class":3431},[1291,10635,694],{"class":3435},[1291,10637,3632],{"class":3431},[1291,10639,3476],{"class":3475},[1291,10641,4654],{"class":3431},[1291,10643,3566],{"class":3435},[1291,10645,4899],{"class":3431},[1291,10647,10648,10650,10652,10654,10656,10658,10660,10662,10665,10667],{"class":3427,"line":3614},[1291,10649,3550],{"class":3475},[1291,10651,3553],{"class":3431},[1291,10653,694],{"class":3435},[1291,10655,3581],{"class":3431},[1291,10657,694],{"class":3435},[1291,10659,3627],{"class":3431},[1291,10661,694],{"class":3435},[1291,10663,10664],{"class":3431},"servers ",[1291,10666,3476],{"class":3475},[1291,10668,10669],{"class":3431}," QASummaryRestServer\n",[1291,10671,10672],{"class":3427,"line":3640},[1291,10673,3526],{"emptyLinePlaceholder":35},[1291,10675,10676],{"class":3427,"line":3665},[1291,10677,3526],{"emptyLinePlaceholder":35},[1291,10679,10680],{"class":3427,"line":3670},[1291,10681,10682],{"class":3673},"# read the text files under the data folder, we can also read from Google Drive, Sharepoint, etc.\n",[1291,10684,10685],{"class":3427,"line":3677},[1291,10686,10687],{"class":3673},"# See connectors documentation: https:\u002F\u002Fpathway.com\u002Fdevelopers\u002Fuser-guide\u002Fconnect\u002Flive-data-framework-connectors to learn more\n",[1291,10689,10690,10692,10694,10696,10698,10700,10702,10704,10706,10708],{"class":3427,"line":3877},[1291,10691,4068],{"class":3431},[1291,10693,3738],{"class":3435},[1291,10695,4073],{"class":3431},[1291,10697,694],{"class":3435},[1291,10699,4078],{"class":3457},[1291,10701,694],{"class":3435},[1291,10703,4083],{"class":3457},[1291,10705,694],{"class":3435},[1291,10707,4088],{"class":3812},[1291,10709,3874],{"class":3435},[1291,10711,10712,10714,10716,10718],{"class":3427,"line":3916},[1291,10713,4095],{"class":3819},[1291,10715,3738],{"class":3435},[1291,10717,9745],{"class":3812},[1291,10719,4107],{"class":3435},[1291,10721,10722,10724,10726,10728,10730,10732],{"class":3427,"line":4519},[1291,10723,4112],{"class":3819},[1291,10725,3738],{"class":3435},[1291,10727,3691],{"class":3435},[1291,10729,4119],{"class":3439},[1291,10731,3691],{"class":3435},[1291,10733,4107],{"class":3435},[1291,10735,10736,10738],{"class":3427,"line":6038},[1291,10737,4128],{"class":3819},[1291,10739,4131],{"class":3435},[1291,10741,10742],{"class":3427,"line":6043},[1291,10743,3827],{"class":3435},[1291,10745,10746],{"class":3427,"line":6066},[1291,10747,3526],{"emptyLinePlaceholder":35},[1291,10749,10750],{"class":3427,"line":6078},[1291,10751,10752],{"class":3673},"# list of data sources to be indexed\n",[1291,10754,10755,10757,10759,10761,10763],{"class":3427,"line":6089},[1291,10756,4140],{"class":3431},[1291,10758,3738],{"class":3435},[1291,10760,4145],{"class":3435},[1291,10762,4148],{"class":3431},[1291,10764,5267],{"class":3435},[1291,10766,10767],{"class":3427,"line":6124},[1291,10768,3526],{"emptyLinePlaceholder":35},[1291,10770,10771],{"class":3427,"line":6133},[1291,10772,10773],{"class":3673},"# define the document processing steps\n",[1291,10775,10776,10778,10780,10782,10784,10786],{"class":3427,"line":6141},[1291,10777,4522],{"class":3431},[1291,10779,3738],{"class":3435},[1291,10781,3601],{"class":3431},[1291,10783,694],{"class":3435},[1291,10785,9184],{"class":3812},[1291,10787,4871],{"class":3435},[1291,10789,10790],{"class":3427,"line":6151},[1291,10791,3526],{"emptyLinePlaceholder":35},[1291,10793,10794,10796,10798,10801,10803,10806,10808,10811,10813,10815,10817,10820,10822,10825],{"class":3427,"line":6923},[1291,10795,5370],{"class":3431},[1291,10797,3738],{"class":3435},[1291,10799,10800],{"class":3431}," splitters",[1291,10802,694],{"class":3435},[1291,10804,10805],{"class":3812},"TokenCountSplitter",[1291,10807,3816],{"class":3435},[1291,10809,10810],{"class":3819},"min_tokens",[1291,10812,3738],{"class":3435},[1291,10814,6802],{"class":3451},[1291,10816,3566],{"class":3435},[1291,10818,10819],{"class":3819}," max_tokens",[1291,10821,3738],{"class":3435},[1291,10823,10824],{"class":3451},"450",[1291,10826,3827],{"class":3435},[1291,10828,10829],{"class":3427,"line":6928},[1291,10830,3526],{"emptyLinePlaceholder":35},[1291,10832,10833,10835,10837,10839,10841,10844],{"class":3427,"line":6934},[1291,10834,4292],{"class":3431},[1291,10836,3738],{"class":3435},[1291,10838,3591],{"class":3431},[1291,10840,694],{"class":3435},[1291,10842,10843],{"class":3812},"OpenAIEmbedder",[1291,10845,3874],{"class":3435},[1291,10847,10848,10851,10853,10856,10859,10862,10864,10866,10868,10871,10873,10875],{"class":3427,"line":6940},[1291,10849,10850],{"class":3819},"    cache_strategy",[1291,10852,3738],{"class":3435},[1291,10854,10855],{"class":3812},"DiskCache",[1291,10857,10858],{"class":3435},"(),",[1291,10860,10861],{"class":3819}," retry_strategy",[1291,10863,3738],{"class":3435},[1291,10865,3841],{"class":3812},[1291,10867,694],{"class":3435},[1291,10869,10870],{"class":3457},"udfs",[1291,10872,694],{"class":3435},[1291,10874,4219],{"class":3812},[1291,10876,4871],{"class":3435},[1291,10878,10879],{"class":3427,"line":6952},[1291,10880,3827],{"class":3435},[1291,10882,10883],{"class":3427,"line":6984},[1291,10884,3526],{"emptyLinePlaceholder":35},[1291,10886,10887,10890,10892,10894,10896,10898,10900,10902],{"class":3427,"line":7996},[1291,10888,10889],{"class":3431},"index ",[1291,10891,3738],{"class":3435},[1291,10893,10520],{"class":3812},[1291,10895,3816],{"class":3435},[1291,10897,4597],{"class":3819},[1291,10899,3738],{"class":3435},[1291,10901,4597],{"class":3812},[1291,10903,3827],{"class":3435},[1291,10905,10906],{"class":3427,"line":8007},[1291,10907,3526],{"emptyLinePlaceholder":35},[1291,10909,10910,10912,10914,10916,10918,10921,10923,10925,10927,10929,10931,10933,10935,10938,10940,10942],{"class":3427,"line":8018},[1291,10911,3586],{"class":3431},[1291,10913,3738],{"class":3435},[1291,10915,3596],{"class":3431},[1291,10917,694],{"class":3435},[1291,10919,10920],{"class":3812},"OpenAIChat",[1291,10922,3816],{"class":3435},[1291,10924,10049],{"class":3819},[1291,10926,3738],{"class":3435},[1291,10928,3691],{"class":3435},[1291,10930,7849],{"class":3439},[1291,10932,3691],{"class":3435},[1291,10934,3566],{"class":3435},[1291,10936,10937],{"class":3819}," cache_strategy",[1291,10939,3738],{"class":3435},[1291,10941,10855],{"class":3812},[1291,10943,6237],{"class":3435},[1291,10945,10946],{"class":3427,"line":8029},[1291,10947,3526],{"emptyLinePlaceholder":35},[1291,10949,10950,10952,10954,10957],{"class":3427,"line":8040},[1291,10951,10614],{"class":3431},[1291,10953,3738],{"class":3435},[1291,10955,10956],{"class":3812}," DocumentStore",[1291,10958,3874],{"class":3435},[1291,10960,10961,10963,10965,10967,10969,10971,10973,10975,10977,10979,10981,10984,10986,10989,10991],{"class":3427,"line":8051},[1291,10962,10164],{"class":3819},[1291,10964,3738],{"class":3435},[1291,10966,4585],{"class":3812},[1291,10968,3566],{"class":3435},[1291,10970,9401],{"class":3819},[1291,10972,3738],{"class":3435},[1291,10974,4621],{"class":3812},[1291,10976,3566],{"class":3435},[1291,10978,5566],{"class":3819},[1291,10980,3738],{"class":3435},[1291,10982,10983],{"class":3812},"text_splitter",[1291,10985,3566],{"class":3435},[1291,10987,10988],{"class":3819}," retriever_factory",[1291,10990,3738],{"class":3435},[1291,10992,10993],{"class":3812},"index\n",[1291,10995,10996],{"class":3427,"line":8057},[1291,10997,3827],{"class":3435},[1291,10999,11000],{"class":3427,"line":8068},[1291,11001,3526],{"emptyLinePlaceholder":35},[1291,11003,11004,11007,11009,11011,11013,11016,11019],{"class":3427,"line":8079},[1291,11005,11006],{"class":3431},"prompt_template",[1291,11008,4390],{"class":3435},[1291,11010,9387],{"class":6356},[1291,11012,3702],{"class":3435},[1291,11014,11015],{"class":3435}," \"\"\"",[1291,11017,11018],{"class":3439},"You are an assistant for question-answering tasks. ",[1291,11020,11021],{"class":3435},"\\\n",[1291,11023,11024,11027],{"class":3427,"line":8090},[1291,11025,11026],{"class":3439},"Use the following pieces of retrieved context to answer the question. ",[1291,11028,11021],{"class":3435},[1291,11030,11031],{"class":3427,"line":8101},[1291,11032,11033],{"class":3439},"If you don't know the answer, just say that you don't know.\n",[1291,11035,11036,11039],{"class":3427,"line":8112},[1291,11037,11038],{"class":3439},"  Question: ",[1291,11040,11041],{"class":3451},"{query}\n",[1291,11043,11044],{"class":3427,"line":8117},[1291,11045,3526],{"emptyLinePlaceholder":35},[1291,11047,11048,11051],{"class":3427,"line":8128},[1291,11049,11050],{"class":3439},"  Context: ",[1291,11052,6019],{"class":3451},[1291,11054,11055],{"class":3427,"line":8139},[1291,11056,3526],{"emptyLinePlaceholder":35},[1291,11058,11059,11062],{"class":3427,"line":8150},[1291,11060,11061],{"class":3439},"  Answer:",[1291,11063,6035],{"class":3435},[1291,11065,11066],{"class":3427,"line":8156},[1291,11067,3526],{"emptyLinePlaceholder":35},[1291,11069,11070],{"class":3427,"line":8162},[1291,11071,11072],{"class":3673},"# create the RAG app that will power the index, and serve the agent endpoint\n",[1291,11074,11075,11078,11080,11082],{"class":3427,"line":8168},[1291,11076,11077],{"class":3431},"rag_app ",[1291,11079,3738],{"class":3435},[1291,11081,4654],{"class":3812},[1291,11083,3874],{"class":3435},[1291,11085,11086,11088,11090,11092],{"class":3427,"line":8174},[1291,11087,10105],{"class":3819},[1291,11089,3738],{"class":3435},[1291,11091,3627],{"class":3812},[1291,11093,4107],{"class":3435},[1291,11095,11096,11099,11101,11104],{"class":3427,"line":8180},[1291,11097,11098],{"class":3819},"    indexer",[1291,11100,3738],{"class":3435},[1291,11102,11103],{"class":3812},"document_store",[1291,11105,4107],{"class":3435},[1291,11107,11108,11111,11113,11115],{"class":3427,"line":8186},[1291,11109,11110],{"class":3819},"    prompt_template",[1291,11112,3738],{"class":3435},[1291,11114,11006],{"class":3812},[1291,11116,4107],{"class":3435},[1291,11118,11119,11122,11124,11127,11129],{"class":3427,"line":8191},[1291,11120,11121],{"class":3819},"    search_topk",[1291,11123,3738],{"class":3435},[1291,11125,11126],{"class":3451},"8",[1291,11128,3566],{"class":3435},[1291,11130,11131],{"class":3673},"  # number of retrieved chunks for RAG\n",[1291,11133,11134],{"class":3427,"line":8197},[1291,11135,3827],{"class":3435},[140,11137,11139],{"id":11138},"build-and-run-the-pathway-server","Build and Run the Pathway server",[3418,11141,11143],{"className":3420,"code":11142,"language":3422,"meta":23,"style":23},"import multiprocessing\n\n\n# host and port of the RAG app\npathway_host: str = \"0.0.0.0\"\npathway_port: int = 8000\n",[3061,11144,11145,11152,11156,11160,11165,11182],{"__ignoreMap":23},[1291,11146,11147,11149],{"class":3427,"line":3428},[1291,11148,3476],{"class":3475},[1291,11150,11151],{"class":3431}," multiprocessing\n",[1291,11153,11154],{"class":3427,"line":24},[1291,11155,3526],{"emptyLinePlaceholder":35},[1291,11157,11158],{"class":3427,"line":675},[1291,11159,3526],{"emptyLinePlaceholder":35},[1291,11161,11162],{"class":3427,"line":3542},[1291,11163,11164],{"class":3673},"# host and port of the RAG app\n",[1291,11166,11167,11170,11172,11174,11176,11178,11180],{"class":3427,"line":3547},[1291,11168,11169],{"class":3431},"pathway_host",[1291,11171,4390],{"class":3435},[1291,11173,9387],{"class":6356},[1291,11175,3702],{"class":3435},[1291,11177,3705],{"class":3435},[1291,11179,4738],{"class":3439},[1291,11181,3746],{"class":3435},[1291,11183,11184,11187,11189,11192,11194],{"class":3427,"line":3572},[1291,11185,11186],{"class":3431},"pathway_port",[1291,11188,4390],{"class":3435},[1291,11190,11191],{"class":6356}," int",[1291,11193,3702],{"class":3435},[1291,11195,4750],{"class":3451},[73,11197,11198],{},"Once the app starts, it will:",[145,11200,11201,11204,11207,11210],{},[148,11202,11203],{},"Ingest your files",[148,11205,11206],{},"Parse and chunk the documents",[148,11208,11209],{},"Index the chunks",[148,11211,11212],{},"Host the RAG endpoint for question answering",[3418,11214,11216],{"className":3420,"code":11215,"language":3422,"meta":23,"style":23},"server = QASummaryRestServer(pathway_host, pathway_port, rag_app)\n\nserver_process = multiprocessing.Process(target=server.run, kwargs=dict(threaded=False))\n",[3061,11217,11218,11243,11247],{"__ignoreMap":23},[1291,11219,11220,11222,11224,11227,11229,11231,11233,11236,11238,11241],{"class":3427,"line":3428},[1291,11221,5536],{"class":3431},[1291,11223,3738],{"class":3435},[1291,11225,11226],{"class":3812}," QASummaryRestServer",[1291,11228,3816],{"class":3435},[1291,11230,11169],{"class":3812},[1291,11232,3566],{"class":3435},[1291,11234,11235],{"class":3812}," pathway_port",[1291,11237,3566],{"class":3435},[1291,11239,11240],{"class":3812}," rag_app",[1291,11242,3827],{"class":3435},[1291,11244,11245],{"class":3427,"line":24},[1291,11246,3526],{"emptyLinePlaceholder":35},[1291,11248,11249,11252,11254,11257,11259,11262,11264,11266,11268,11270,11272,11275,11277,11280,11282,11285,11287,11290],{"class":3427,"line":675},[1291,11250,11251],{"class":3431},"server_process ",[1291,11253,3738],{"class":3435},[1291,11255,11256],{"class":3431}," multiprocessing",[1291,11258,694],{"class":3435},[1291,11260,11261],{"class":3812},"Process",[1291,11263,3816],{"class":3435},[1291,11265,4813],{"class":3819},[1291,11267,3738],{"class":3435},[1291,11269,5580],{"class":3812},[1291,11271,694],{"class":3435},[1291,11273,11274],{"class":3457},"run",[1291,11276,3566],{"class":3435},[1291,11278,11279],{"class":3819}," kwargs",[1291,11281,3738],{"class":3435},[1291,11283,11284],{"class":6356},"dict",[1291,11286,3816],{"class":3435},[1291,11288,11289],{"class":3819},"threaded",[1291,11291,11292],{"class":3435},"=False))\n",[73,11294,11295],{},"Start the process:",[3418,11297,11299],{"className":3420,"code":11298,"language":3422,"meta":23,"style":23},"server_process.start()\n",[3061,11300,11301],{"__ignoreMap":23},[1291,11302,11303,11306,11308,11310],{"class":3427,"line":3428},[1291,11304,11305],{"class":3431},"server_process",[1291,11307,694],{"class":3435},[1291,11309,4868],{"class":3812},[1291,11311,4871],{"class":3435},[73,11313,11314],{},"RAGClient is the client that can query the Pathway Live Data Framework RAG application.",[73,11316,11317],{},"Let's test if the test files are indexed. This will list all the indexed documents in our Pathway server.",[3418,11319,11321],{"className":3420,"code":11320,"language":3422,"meta":23,"style":23},"from pathway.xpacks.llm.question_answering import RAGClient\n\npathway_client = RAGClient(pathway_host, pathway_port)\npathway_client.list_documents()\n",[3061,11322,11323,11345,11349,11368],{"__ignoreMap":23},[1291,11324,11325,11327,11329,11331,11333,11335,11337,11339,11341,11343],{"class":3427,"line":3428},[1291,11326,3550],{"class":3475},[1291,11328,3553],{"class":3431},[1291,11330,694],{"class":3435},[1291,11332,3581],{"class":3431},[1291,11334,694],{"class":3435},[1291,11336,3627],{"class":3431},[1291,11338,694],{"class":3435},[1291,11340,3632],{"class":3431},[1291,11342,3476],{"class":3475},[1291,11344,4899],{"class":3431},[1291,11346,11347],{"class":3427,"line":24},[1291,11348,3526],{"emptyLinePlaceholder":35},[1291,11350,11351,11354,11356,11358,11360,11362,11364,11366],{"class":3427,"line":675},[1291,11352,11353],{"class":3431},"pathway_client ",[1291,11355,3738],{"class":3435},[1291,11357,4918],{"class":3812},[1291,11359,3816],{"class":3435},[1291,11361,11169],{"class":3812},[1291,11363,3566],{"class":3435},[1291,11365,11235],{"class":3812},[1291,11367,3827],{"class":3435},[1291,11369,11370,11373,11375,11378],{"class":3427,"line":3542},[1291,11371,11372],{"class":3431},"pathway_client",[1291,11374,694],{"class":3435},[1291,11376,11377],{"class":3812},"list_documents",[1291,11379,4871],{"class":3435},[140,11381,9038],{"id":11382},"evaluate-with-the-dataset",[73,11384,11385],{},"Here, we will iterate over the samples and gather the RAG response and the context documents for each one of the test samples.",[73,11387,11388],{},"Keep in mind that LLM evaluation metrics can fluctuate between runs.  Even minor details like context document order or wording can impact results. For more reliable testing, it's best to re-run tests multiple times and average the scores.",[3418,11390,11392],{"className":3420,"code":11391,"language":3422,"meta":23,"style":23},"def predict_test_dataset(\n    dataset: Testset | EvaluationDataset, verbose: bool = True\n) -> EvaluationDataset:\n    predicted_samples: list[SingleTurnSample] = []\n\n    for sample in dataset.samples:\n        single_sample = sample.eval_sample if isinstance(dataset, Testset) else sample\n\n        if verbose:\n            print(f\"Predicting question: {single_sample.user_input}\")\n\n        pw_response: dict = pathway_client.answer(\n            prompt=single_sample.user_input, return_context_docs=True\n        )\n        resp: str = pw_response[\"response\"]\n        context_docs: list[str] = [elem[\"text\"] for elem in pw_response[\"context_docs\"]]\n\n        pred_sample = SingleTurnSample(\n            response=resp, retrieved_contexts=context_docs, **single_sample.to_dict()\n        )\n        predicted_samples.append(pred_sample)\n\n    return EvaluationDataset(samples=predicted_samples)\n",[3061,11393,11394,11404,11433,11443,11464,11468,11488,11526,11530,11539,11567,11571,11592,11612,11617,11641,11694,11698,11710,11743,11747,11763,11767],{"__ignoreMap":23},[1291,11395,11396,11399,11402],{"class":3427,"line":3428},[1291,11397,11398],{"class":7739},"def",[1291,11400,11401],{"class":3812}," predict_test_dataset",[1291,11403,3874],{"class":3435},[1291,11405,11406,11409,11411,11414,11417,11419,11421,11424,11426,11429,11431],{"class":3427,"line":24},[1291,11407,11408],{"class":3819},"    dataset",[1291,11410,4390],{"class":3435},[1291,11412,11413],{"class":3431}," Testset ",[1291,11415,11416],{"class":3435},"|",[1291,11418,10227],{"class":3431},[1291,11420,3566],{"class":3435},[1291,11422,11423],{"class":3819}," verbose",[1291,11425,4390],{"class":3435},[1291,11427,11428],{"class":6356}," bool",[1291,11430,3702],{"class":3435},[1291,11432,4853],{"class":3435},[1291,11434,11435,11437,11439,11441],{"class":3427,"line":675},[1291,11436,713],{"class":3435},[1291,11438,9427],{"class":3435},[1291,11440,10227],{"class":3431},[1291,11442,5243],{"class":3435},[1291,11444,11445,11448,11450,11453,11455,11458,11460,11462],{"class":3427,"line":3542},[1291,11446,11447],{"class":3431},"    predicted_samples",[1291,11449,4390],{"class":3435},[1291,11451,11452],{"class":3431}," list",[1291,11454,3688],{"class":3435},[1291,11456,11457],{"class":3431},"SingleTurnSample",[1291,11459,3699],{"class":3435},[1291,11461,3702],{"class":3435},[1291,11463,6554],{"class":3435},[1291,11465,11466],{"class":3427,"line":3547},[1291,11467,3526],{"emptyLinePlaceholder":35},[1291,11469,11470,11473,11476,11478,11481,11483,11486],{"class":3427,"line":3572},[1291,11471,11472],{"class":3475},"    for",[1291,11474,11475],{"class":3431}," sample ",[1291,11477,9566],{"class":3475},[1291,11479,11480],{"class":3431}," dataset",[1291,11482,694],{"class":3435},[1291,11484,11485],{"class":3457},"samples",[1291,11487,5243],{"class":3435},[1291,11489,11490,11493,11495,11498,11500,11503,11506,11509,11511,11513,11515,11518,11520,11523],{"class":3427,"line":3614},[1291,11491,11492],{"class":3431},"        single_sample ",[1291,11494,3738],{"class":3435},[1291,11496,11497],{"class":3431}," sample",[1291,11499,694],{"class":3435},[1291,11501,11502],{"class":3457},"eval_sample",[1291,11504,11505],{"class":3475}," if",[1291,11507,11508],{"class":3812}," isinstance",[1291,11510,3816],{"class":3435},[1291,11512,9256],{"class":3812},[1291,11514,3566],{"class":3435},[1291,11516,11517],{"class":3812}," Testset",[1291,11519,713],{"class":3435},[1291,11521,11522],{"class":3475}," else",[1291,11524,11525],{"class":3431}," sample\n",[1291,11527,11528],{"class":3427,"line":3640},[1291,11529,3526],{"emptyLinePlaceholder":35},[1291,11531,11532,11535,11537],{"class":3427,"line":3665},[1291,11533,11534],{"class":3475},"        if",[1291,11536,11423],{"class":3431},[1291,11538,5243],{"class":3435},[1291,11540,11541,11544,11546,11548,11551,11553,11556,11558,11561,11563,11565],{"class":3427,"line":3670},[1291,11542,11543],{"class":3812},"            print",[1291,11545,3816],{"class":3435},[1291,11547,9643],{"class":7739},[1291,11549,11550],{"class":3439},"\"Predicting question: ",[1291,11552,8770],{"class":3451},[1291,11554,11555],{"class":3812},"single_sample",[1291,11557,694],{"class":3435},[1291,11559,11560],{"class":3457},"user_input",[1291,11562,9671],{"class":3451},[1291,11564,3691],{"class":3439},[1291,11566,3827],{"class":3435},[1291,11568,11569],{"class":3427,"line":3677},[1291,11570,3526],{"emptyLinePlaceholder":35},[1291,11572,11573,11576,11578,11581,11583,11586,11588,11590],{"class":3427,"line":3877},[1291,11574,11575],{"class":3431},"        pw_response",[1291,11577,4390],{"class":3435},[1291,11579,11580],{"class":6356}," dict",[1291,11582,3702],{"class":3435},[1291,11584,11585],{"class":3431}," pathway_client",[1291,11587,694],{"class":3435},[1291,11589,4970],{"class":3812},[1291,11591,3874],{"class":3435},[1291,11593,11594,11597,11599,11601,11603,11605,11607,11610],{"class":3427,"line":3916},[1291,11595,11596],{"class":3819},"            prompt",[1291,11598,3738],{"class":3435},[1291,11600,11555],{"class":3812},[1291,11602,694],{"class":3435},[1291,11604,11560],{"class":3457},[1291,11606,3566],{"class":3435},[1291,11608,11609],{"class":3819}," return_context_docs",[1291,11611,6981],{"class":3435},[1291,11613,11614],{"class":3427,"line":4519},[1291,11615,11616],{"class":3435},"        )\n",[1291,11618,11619,11622,11624,11626,11628,11631,11633,11635,11637,11639],{"class":3427,"line":6038},[1291,11620,11621],{"class":3431},"        resp",[1291,11623,4390],{"class":3435},[1291,11625,9387],{"class":6356},[1291,11627,3702],{"class":3435},[1291,11629,11630],{"class":3431}," pw_response",[1291,11632,3688],{"class":3435},[1291,11634,3691],{"class":3435},[1291,11636,4991],{"class":3439},[1291,11638,3691],{"class":3435},[1291,11640,5267],{"class":3435},[1291,11642,11643,11646,11648,11650,11652,11654,11656,11658,11660,11663,11665,11667,11669,11671,11673,11675,11678,11680,11682,11684,11686,11689,11691],{"class":3427,"line":6043},[1291,11644,11645],{"class":3431},"        context_docs",[1291,11647,4390],{"class":3435},[1291,11649,11452],{"class":3431},[1291,11651,3688],{"class":3435},[1291,11653,7171],{"class":6356},[1291,11655,3699],{"class":3435},[1291,11657,3702],{"class":3435},[1291,11659,4145],{"class":3435},[1291,11661,11662],{"class":3431},"elem",[1291,11664,3688],{"class":3435},[1291,11666,3691],{"class":3435},[1291,11668,4999],{"class":3439},[1291,11670,3691],{"class":3435},[1291,11672,3699],{"class":3435},[1291,11674,9560],{"class":3475},[1291,11676,11677],{"class":3431}," elem ",[1291,11679,9566],{"class":3475},[1291,11681,11630],{"class":3431},[1291,11683,3688],{"class":3435},[1291,11685,3691],{"class":3435},[1291,11687,11688],{"class":3439},"context_docs",[1291,11690,3691],{"class":3435},[1291,11692,11693],{"class":3435},"]]\n",[1291,11695,11696],{"class":3427,"line":6066},[1291,11697,3526],{"emptyLinePlaceholder":35},[1291,11699,11700,11703,11705,11708],{"class":3427,"line":6078},[1291,11701,11702],{"class":3431},"        pred_sample ",[1291,11704,3738],{"class":3435},[1291,11706,11707],{"class":3812}," SingleTurnSample",[1291,11709,3874],{"class":3435},[1291,11711,11712,11715,11717,11720,11722,11725,11727,11729,11731,11734,11736,11738,11741],{"class":3427,"line":6089},[1291,11713,11714],{"class":3819},"            response",[1291,11716,3738],{"class":3435},[1291,11718,11719],{"class":3812},"resp",[1291,11721,3566],{"class":3435},[1291,11723,11724],{"class":3819}," retrieved_contexts",[1291,11726,3738],{"class":3435},[1291,11728,11688],{"class":3812},[1291,11730,3566],{"class":3435},[1291,11732,11733],{"class":3435}," **",[1291,11735,11555],{"class":3812},[1291,11737,694],{"class":3435},[1291,11739,11740],{"class":3812},"to_dict",[1291,11742,4871],{"class":3435},[1291,11744,11745],{"class":3427,"line":6124},[1291,11746,11616],{"class":3435},[1291,11748,11749,11752,11754,11756,11758,11761],{"class":3427,"line":6133},[1291,11750,11751],{"class":3431},"        predicted_samples",[1291,11753,694],{"class":3435},[1291,11755,6564],{"class":3812},[1291,11757,3816],{"class":3435},[1291,11759,11760],{"class":3812},"pred_sample",[1291,11762,3827],{"class":3435},[1291,11764,11765],{"class":3427,"line":6141},[1291,11766,3526],{"emptyLinePlaceholder":35},[1291,11768,11769,11772,11774,11776,11778,11780,11783],{"class":3427,"line":6151},[1291,11770,11771],{"class":3475},"    return",[1291,11773,10227],{"class":3812},[1291,11775,3816],{"class":3435},[1291,11777,11485],{"class":3819},[1291,11779,3738],{"class":3435},[1291,11781,11782],{"class":3812},"predicted_samples",[1291,11784,3827],{"class":3435},[3418,11786,11788],{"className":3420,"code":11787,"language":3422,"meta":23,"style":23},"predicted_dataset = predict_test_dataset(dataset)\n",[3061,11789,11790],{"__ignoreMap":23},[1291,11791,11792,11795,11797,11799,11801,11803],{"class":3427,"line":3428},[1291,11793,11794],{"class":3431},"predicted_dataset ",[1291,11796,3738],{"class":3435},[1291,11798,11401],{"class":3812},[1291,11800,3816],{"class":3435},[1291,11802,9256],{"class":3812},[1291,11804,3827],{"class":3435},[3418,11806,11808],{"className":3420,"code":11807,"language":3422,"meta":23,"style":23},"predicted_dataset.to_pandas().head()\n",[3061,11809,11810],{"__ignoreMap":23},[1291,11811,11812,11815,11817,11819,11821,11823],{"class":3427,"line":3428},[1291,11813,11814],{"class":3431},"predicted_dataset",[1291,11816,694],{"class":3435},[1291,11818,10338],{"class":3812},[1291,11820,10341],{"class":3435},[1291,11822,10344],{"class":3812},[1291,11824,4871],{"class":3435},[3418,11826,11828],{"className":3420,"code":11827,"language":3422,"meta":23,"style":23},"from ragas import evaluate\n\n\nfrom ragas.metrics import (\n    AnswerCorrectness,\n    Faithfulness,\n    context_recall,\n    context_precision,\n)\n\n",[3061,11829,11830,11841,11845,11849,11864,11871,11878,11885,11892],{"__ignoreMap":23},[1291,11831,11832,11834,11836,11838],{"class":3427,"line":3428},[1291,11833,3550],{"class":3475},[1291,11835,10222],{"class":3431},[1291,11837,3476],{"class":3475},[1291,11839,11840],{"class":3431}," evaluate\n",[1291,11842,11843],{"class":3427,"line":24},[1291,11844,3526],{"emptyLinePlaceholder":35},[1291,11846,11847],{"class":3427,"line":675},[1291,11848,3526],{"emptyLinePlaceholder":35},[1291,11850,11851,11853,11855,11857,11860,11862],{"class":3427,"line":3542},[1291,11852,3550],{"class":3475},[1291,11854,9964],{"class":3431},[1291,11856,694],{"class":3435},[1291,11858,11859],{"class":3431},"metrics ",[1291,11861,3476],{"class":3475},[1291,11863,6086],{"class":3435},[1291,11865,11866,11869],{"class":3427,"line":3547},[1291,11867,11868],{"class":3431},"    AnswerCorrectness",[1291,11870,4107],{"class":3435},[1291,11872,11873,11876],{"class":3427,"line":3572},[1291,11874,11875],{"class":3431},"    Faithfulness",[1291,11877,4107],{"class":3435},[1291,11879,11880,11883],{"class":3427,"line":3614},[1291,11881,11882],{"class":3431},"    context_recall",[1291,11884,4107],{"class":3435},[1291,11886,11887,11890],{"class":3427,"line":3640},[1291,11888,11889],{"class":3431},"    context_precision",[1291,11891,4107],{"class":3435},[1291,11893,11894],{"class":3427,"line":3665},[1291,11895,3827],{"class":3435},[73,11897,11898],{},"Calculate the evaluation metrics with our selected metrics.",[73,11900,11901],{},"We introduced few modifications on top of the default RAGAS settings, namely:",[145,11903,11904,11907,11914],{},[148,11905,11906],{},"We completely ignored semantic similarity in the answer correctness, we found that it usually gives \"false positives\" and unnecessarily rewards bad predictions*.",[148,11908,11909,11910,11913],{},"We modified ",[3061,11911,11912],{},"answer_correctness_metric","'s prompt to be more forgiving and not look for the exact same words.",[148,11915,11916,11917,11920],{},"We increased ",[3061,11918,11919],{},"beta"," parameter of the correctness to favor the recall rather than precision. We reward if LLM has more of relevant documents in the context. This is because LLM can choose to ignore irrelevant documents (False positive in context) which diminishes the importance of the precision.",[9194,11922,11923],{},[73,11924,11925],{},"* This issue stems from the limitations of commonly used encoder models, such as those generating sentence embeddings. These models are primarily trained on tasks like document similarity and natural language inference, making them effective at identifying semantically related text but not at evaluating factual accuracy.",[3418,11927,11929],{"className":3420,"code":11928,"language":3422,"meta":23,"style":23},"def run_ragas_evaluations(dataset: EvaluationDataset):\n\n    evaluator_llm = LangchainLLMWrapper(\n        ChatOpenAI(model=\"gpt-4o-mini\", temperature=0.0)\n    )\n\n    answer_correctness_metric = AnswerCorrectness(\n        llm=evaluator_llm,\n        weights=[\n            1.0,\n            0.0,\n        ],  # ignore the semantic similarity, it is often misleading. Prone to giving hi score to false labels.\n        max_retries=3,\n        beta=1.5,  # favor the recall a bit more\n    )\n\n    # adjust the evaluator LLM prompt to be more forgiving\n\n    correctness_prompt = answer_correctness_metric.get_prompts()[\"correctness_prompt\"]\n\n    correctness_prompt.instruction += \"\"\" Answer may be less or more verbose than the ground truth, that is fine.\n    If the ground truth is 'Yes' and answer is 'Yes, [... some details]', consider it as true.\"\"\"\n    answer_correctness_metric.set_prompts(**{\"correctness_prompt\": correctness_prompt})\n\n    metrics: list = [\n        answer_correctness_metric,\n        Faithfulness(llm=evaluator_llm),\n        context_recall,\n        context_precision,\n    ]\n    results = evaluate(dataset=dataset, metrics=metrics)\n    return results\n",[3061,11930,11931,11949,11953,11964,11992,11997,12001,12013,12024,12032,12039,12046,12054,12066,12081,12085,12089,12094,12098,12125,12129,12147,12154,12181,12185,12198,12205,12220,12227,12234,12239,12269],{"__ignoreMap":23},[1291,11932,11933,11935,11938,11940,11942,11944,11946],{"class":3427,"line":3428},[1291,11934,11398],{"class":7739},[1291,11936,11937],{"class":3812}," run_ragas_evaluations",[1291,11939,3816],{"class":3435},[1291,11941,9256],{"class":3819},[1291,11943,4390],{"class":3435},[1291,11945,10227],{"class":3431},[1291,11947,11948],{"class":3435},"):\n",[1291,11950,11951],{"class":3427,"line":24},[1291,11952,3526],{"emptyLinePlaceholder":35},[1291,11954,11955,11958,11960,11962],{"class":3427,"line":675},[1291,11956,11957],{"class":3431},"    evaluator_llm ",[1291,11959,3738],{"class":3435},[1291,11961,10039],{"class":3812},[1291,11963,3874],{"class":3435},[1291,11965,11966,11969,11971,11973,11975,11977,11980,11982,11984,11986,11988,11990],{"class":3427,"line":3542},[1291,11967,11968],{"class":3812},"        ChatOpenAI",[1291,11970,3816],{"class":3435},[1291,11972,10049],{"class":3819},[1291,11974,3738],{"class":3435},[1291,11976,3691],{"class":3435},[1291,11978,11979],{"class":3439},"gpt-4o-mini",[1291,11981,3691],{"class":3435},[1291,11983,3566],{"class":3435},[1291,11985,10062],{"class":3819},[1291,11987,3738],{"class":3435},[1291,11989,4252],{"class":3451},[1291,11991,3827],{"class":3435},[1291,11993,11994],{"class":3427,"line":3547},[1291,11995,11996],{"class":3435},"    )\n",[1291,11998,11999],{"class":3427,"line":3572},[1291,12000,3526],{"emptyLinePlaceholder":35},[1291,12002,12003,12006,12008,12011],{"class":3427,"line":3614},[1291,12004,12005],{"class":3431},"    answer_correctness_metric ",[1291,12007,3738],{"class":3435},[1291,12009,12010],{"class":3812}," AnswerCorrectness",[1291,12012,3874],{"class":3435},[1291,12014,12015,12017,12019,12022],{"class":3427,"line":3640},[1291,12016,4661],{"class":3819},[1291,12018,3738],{"class":3435},[1291,12020,12021],{"class":3812},"evaluator_llm",[1291,12023,4107],{"class":3435},[1291,12025,12026,12029],{"class":3427,"line":3665},[1291,12027,12028],{"class":3819},"        weights",[1291,12030,12031],{"class":3435},"=[\n",[1291,12033,12034,12037],{"class":3427,"line":3670},[1291,12035,12036],{"class":3451},"            1.0",[1291,12038,4107],{"class":3435},[1291,12040,12041,12044],{"class":3427,"line":3677},[1291,12042,12043],{"class":3451},"            0.0",[1291,12045,4107],{"class":3435},[1291,12047,12048,12051],{"class":3427,"line":3877},[1291,12049,12050],{"class":3435},"        ],",[1291,12052,12053],{"class":3673},"  # ignore the semantic similarity, it is often misleading. Prone to giving hi score to false labels.\n",[1291,12055,12056,12059,12061,12064],{"class":3427,"line":3916},[1291,12057,12058],{"class":3819},"        max_retries",[1291,12060,3738],{"class":3435},[1291,12062,12063],{"class":3451},"3",[1291,12065,4107],{"class":3435},[1291,12067,12068,12071,12073,12076,12078],{"class":3427,"line":4519},[1291,12069,12070],{"class":3819},"        beta",[1291,12072,3738],{"class":3435},[1291,12074,12075],{"class":3451},"1.5",[1291,12077,3566],{"class":3435},[1291,12079,12080],{"class":3673},"  # favor the recall a bit more\n",[1291,12082,12083],{"class":3427,"line":6038},[1291,12084,11996],{"class":3435},[1291,12086,12087],{"class":3427,"line":6043},[1291,12088,3526],{"emptyLinePlaceholder":35},[1291,12090,12091],{"class":3427,"line":6066},[1291,12092,12093],{"class":3673},"    # adjust the evaluator LLM prompt to be more forgiving\n",[1291,12095,12096],{"class":3427,"line":6078},[1291,12097,3526],{"emptyLinePlaceholder":35},[1291,12099,12100,12103,12105,12108,12110,12113,12116,12118,12121,12123],{"class":3427,"line":6089},[1291,12101,12102],{"class":3431},"    correctness_prompt ",[1291,12104,3738],{"class":3435},[1291,12106,12107],{"class":3431}," answer_correctness_metric",[1291,12109,694],{"class":3435},[1291,12111,12112],{"class":3812},"get_prompts",[1291,12114,12115],{"class":3435},"()[",[1291,12117,3691],{"class":3435},[1291,12119,12120],{"class":3439},"correctness_prompt",[1291,12122,3691],{"class":3435},[1291,12124,5267],{"class":3435},[1291,12126,12127],{"class":3427,"line":6124},[1291,12128,3526],{"emptyLinePlaceholder":35},[1291,12130,12131,12134,12136,12139,12142,12144],{"class":3427,"line":6133},[1291,12132,12133],{"class":3431},"    correctness_prompt",[1291,12135,694],{"class":3435},[1291,12137,12138],{"class":3457},"instruction",[1291,12140,12141],{"class":3435}," +=",[1291,12143,11015],{"class":3435},[1291,12145,12146],{"class":3439}," Answer may be less or more verbose than the ground truth, that is fine.\n",[1291,12148,12149,12152],{"class":3427,"line":6141},[1291,12150,12151],{"class":3439},"    If the ground truth is 'Yes' and answer is 'Yes, [... some details]', consider it as true.",[1291,12153,6035],{"class":3435},[1291,12155,12156,12159,12161,12164,12167,12169,12171,12173,12175,12178],{"class":3427,"line":6151},[1291,12157,12158],{"class":3431},"    answer_correctness_metric",[1291,12160,694],{"class":3435},[1291,12162,12163],{"class":3812},"set_prompts",[1291,12165,12166],{"class":3435},"(**{",[1291,12168,3691],{"class":3435},[1291,12170,12120],{"class":3439},[1291,12172,3691],{"class":3435},[1291,12174,4390],{"class":3435},[1291,12176,12177],{"class":3812}," correctness_prompt",[1291,12179,12180],{"class":3435},"})\n",[1291,12182,12183],{"class":3427,"line":6923},[1291,12184,3526],{"emptyLinePlaceholder":35},[1291,12186,12187,12190,12192,12194,12196],{"class":3427,"line":6928},[1291,12188,12189],{"class":3431},"    metrics",[1291,12191,4390],{"class":3435},[1291,12193,11452],{"class":6356},[1291,12195,3702],{"class":3435},[1291,12197,6785],{"class":3435},[1291,12199,12200,12203],{"class":3427,"line":6934},[1291,12201,12202],{"class":3431},"        answer_correctness_metric",[1291,12204,4107],{"class":3435},[1291,12206,12207,12210,12212,12214,12216,12218],{"class":3427,"line":6940},[1291,12208,12209],{"class":3812},"        Faithfulness",[1291,12211,3816],{"class":3435},[1291,12213,3627],{"class":3819},[1291,12215,3738],{"class":3435},[1291,12217,12021],{"class":3812},[1291,12219,4242],{"class":3435},[1291,12221,12222,12225],{"class":3427,"line":6952},[1291,12223,12224],{"class":3431},"        context_recall",[1291,12226,4107],{"class":3435},[1291,12228,12229,12232],{"class":3427,"line":6984},[1291,12230,12231],{"class":3431},"        context_precision",[1291,12233,4107],{"class":3435},[1291,12235,12236],{"class":3427,"line":7996},[1291,12237,12238],{"class":3435},"    ]\n",[1291,12240,12241,12244,12246,12249,12251,12253,12255,12257,12259,12262,12264,12267],{"class":3427,"line":8007},[1291,12242,12243],{"class":3431},"    results ",[1291,12245,3738],{"class":3435},[1291,12247,12248],{"class":3812}," evaluate",[1291,12250,3816],{"class":3435},[1291,12252,9256],{"class":3819},[1291,12254,3738],{"class":3435},[1291,12256,9256],{"class":3812},[1291,12258,3566],{"class":3435},[1291,12260,12261],{"class":3819}," metrics",[1291,12263,3738],{"class":3435},[1291,12265,12266],{"class":3812},"metrics",[1291,12268,3827],{"class":3435},[1291,12270,12271,12273],{"class":3427,"line":8018},[1291,12272,11771],{"class":3475},[1291,12274,12275],{"class":3431}," results\n",[3418,12277,12279],{"className":3420,"code":12278,"language":3422,"meta":23,"style":23},"ragas_evals_dataset = run_ragas_evaluations(predicted_dataset)\n",[3061,12280,12281],{"__ignoreMap":23},[1291,12282,12283,12286,12288,12290,12292,12294],{"class":3427,"line":3428},[1291,12284,12285],{"class":3431},"ragas_evals_dataset ",[1291,12287,3738],{"class":3435},[1291,12289,11937],{"class":3812},[1291,12291,3816],{"class":3435},[1291,12293,11814],{"class":3812},[1291,12295,3827],{"class":3435},[3418,12297,12299],{"className":3420,"code":12298,"language":3422,"meta":23,"style":23},"ragas_evals_dataset\n",[3061,12300,12301],{"__ignoreMap":23},[1291,12302,12303],{"class":3427,"line":3428},[1291,12304,12298],{"class":3431},[3418,12306,12309],{"className":12307,"code":12308,"language":4999},[4997],"{'answer_correctness': 0.5249, 'faithfulness': 0.6275, 'context_recall': 0.9353, 'context_precision': 0.7761}\n",[3061,12310,12308],{"__ignoreMap":23},[73,12312,12313],{},"We will learn how to improve these below.",[73,12315,12316,12317,12320],{},"Let's inspect the eval metrics based on the questions. We see that in some cases, LLM had context recall of ",[3061,12318,12319],{},"1.0"," but failed to answer the question correctly. This may be indicator of poor performance from the LLM, or irrelevant context (if precision is low) that caused LLM to be thrown off.",[3418,12322,12324],{"className":3420,"code":12323,"language":3422,"meta":23,"style":23},"pd.DataFrame(ragas_evals_dataset.scores)\n",[3061,12325,12326],{"__ignoreMap":23},[1291,12327,12328,12331,12333,12336,12338,12341,12343,12346],{"class":3427,"line":3428},[1291,12329,12330],{"class":3431},"pd",[1291,12332,694],{"class":3435},[1291,12334,12335],{"class":3812},"DataFrame",[1291,12337,3816],{"class":3435},[1291,12339,12340],{"class":3812},"ragas_evals_dataset",[1291,12342,694],{"class":3435},[1291,12344,12345],{"class":3457},"scores",[1291,12347,3827],{"class":3435},[73,12349,12350],{},"Terminate the app;",[3418,12352,12354],{"className":3420,"code":12353,"language":3422,"meta":23,"style":23},"server_process.terminate()\nserver_process.join()\n",[3061,12355,12356,12367],{"__ignoreMap":23},[1291,12357,12358,12360,12362,12365],{"class":3427,"line":3428},[1291,12359,11305],{"class":3431},[1291,12361,694],{"class":3435},[1291,12363,12364],{"class":3812},"terminate",[1291,12366,4871],{"class":3435},[1291,12368,12369,12371,12373,12375],{"class":3427,"line":24},[1291,12370,11305],{"class":3431},[1291,12372,694],{"class":3435},[1291,12374,9544],{"class":3812},[1291,12376,4871],{"class":3435},[73,12378,12379],{},"Clear the previous app from the Pathway engine",[3418,12381,12383],{"className":3420,"code":12382,"language":3422,"meta":23,"style":23},"def clear_pathway_graph() -> None:\n    from pathway.internals.parse_graph import G\n\n    G.clear()\n\n\nclear_pathway_graph()\n",[3061,12384,12385,12399,12421,12425,12437,12441,12445],{"__ignoreMap":23},[1291,12386,12387,12389,12392,12395,12397],{"class":3427,"line":3428},[1291,12388,11398],{"class":7739},[1291,12390,12391],{"class":3812}," clear_pathway_graph",[1291,12393,12394],{"class":3435},"()",[1291,12396,9427],{"class":3435},[1291,12398,9430],{"class":3435},[1291,12400,12401,12404,12406,12408,12411,12413,12416,12418],{"class":3427,"line":24},[1291,12402,12403],{"class":3475},"    from",[1291,12405,3553],{"class":3431},[1291,12407,694],{"class":3435},[1291,12409,12410],{"class":3431},"internals",[1291,12412,694],{"class":3435},[1291,12414,12415],{"class":3431},"parse_graph ",[1291,12417,3476],{"class":3475},[1291,12419,12420],{"class":3431}," G\n",[1291,12422,12423],{"class":3427,"line":675},[1291,12424,3526],{"emptyLinePlaceholder":35},[1291,12426,12427,12430,12432,12435],{"class":3427,"line":3542},[1291,12428,12429],{"class":3431},"    G",[1291,12431,694],{"class":3435},[1291,12433,12434],{"class":3812},"clear",[1291,12436,4871],{"class":3435},[1291,12438,12439],{"class":3427,"line":3547},[1291,12440,3526],{"emptyLinePlaceholder":35},[1291,12442,12443],{"class":3427,"line":3572},[1291,12444,3526],{"emptyLinePlaceholder":35},[1291,12446,12447,12450],{"class":3427,"line":3614},[1291,12448,12449],{"class":3812},"clear_pathway_graph",[1291,12451,4871],{"class":3435},[140,12453,12455],{"id":12454},"improving-the-accuracy","Improving the Accuracy",[73,12457,12458],{},"A RAG application's performance is impacted by many variables, we can gather them under two broad categories that are linked together:",[145,12460,12461,12464],{},[148,12462,12463],{},"Retrieval",[148,12465,12466],{},"Generation",[73,12468,12469],{},"Retrieval performance mainly consists of quality of the input data",[3189,12471,9052],{"id":12472},"hybrid-index",[73,12474,12475],{},"Hybrid index combines semantic search and keyword based BM25 search.",[73,12477,10378,12478,12481],{},[77,12479,10485],{"href":10483,"rel":12480},[81]," lets you combine different indexes to build an hybrid index:",[665,12483,12484,12490],{},[148,12485,12486,12489],{},[77,12487,10477],{"href":10475,"rel":12488},[81]," (via TantivyBM25Factory) → Keyword based BM25 search.",[148,12491,12492,12497],{},[77,12493,12496],{"href":12494,"rel":12495},"https:\u002F\u002Fpathway.com\u002Fdevelopers\u002Fapi-docs\u002Findexing#pathway.stdlib.indexing.BruteForceKnn",[81],"BruteForceKnn"," → Vector-based semantic search",[3418,12499,12501],{"className":3420,"code":12500,"language":3422,"meta":23,"style":23},"folder = pw.io.fs.read(\n    path=INPUT_FOLDER,\n    format=\"binary\",\n    with_metadata=True,\n)\n\nsources = [folder]\n\nparser = parsers.UnstructuredParser()\n\ntext_splitter = splitters.TokenCountSplitter(min_tokens=150, max_tokens=450)\n\nembedder = embedders.OpenAIEmbedder(\n    cache_strategy=DiskCache(), retry_strategy=pw.udfs.ExponentialBackoffRetryStrategy()\n)\n\nhybrid_index = HybridIndexFactory(\n    [\n        TantivyBM25Factory(),\n        BruteForceKnnFactory(embedder=embedder),\n    ]\n)\n\nllm = llms.OpenAIChat(model=\"gpt-4o\", cache_strategy=DiskCache())\n\ndocument_store = DocumentStore(\n    docs=sources, parser=parser, splitter=text_splitter, retriever_factory=hybrid_index\n)\n\nprompt_template: str = \"\"\"You are an assistant for question-answering tasks. \\\nUse the following pieces of retrieved context to answer the question. \\\nIf you don't know the answer, just say that you don't know.\n  Question: {query}\n\n  Context: {context}\n\n  Answer:\"\"\"\n\nrag_app = BaseRAGQuestionAnswerer(\n    llm=llm,\n    indexer=document_store,\n    prompt_template=prompt_template,\n)\n\n\nserver = QASummaryRestServer(pathway_host, pathway_port, rag_app)\n\nserver_process = multiprocessing.Process(target=server.run, kwargs=dict(threaded=False))\n",[3061,12502,12503,12525,12535,12549,12555,12559,12563,12575,12579,12593,12597,12627,12631,12645,12671,12675,12679,12691,12696,12704,12719,12723,12727,12731,12765,12769,12779,12812,12816,12820,12836,12842,12846,12852,12856,12862,12866,12872,12876,12886,12896,12906,12916,12920,12924,12928,12950,12954],{"__ignoreMap":23},[1291,12504,12505,12507,12509,12511,12513,12515,12517,12519,12521,12523],{"class":3427,"line":3428},[1291,12506,4068],{"class":3431},[1291,12508,3738],{"class":3435},[1291,12510,4073],{"class":3431},[1291,12512,694],{"class":3435},[1291,12514,4078],{"class":3457},[1291,12516,694],{"class":3435},[1291,12518,4083],{"class":3457},[1291,12520,694],{"class":3435},[1291,12522,4088],{"class":3812},[1291,12524,3874],{"class":3435},[1291,12526,12527,12529,12531,12533],{"class":3427,"line":24},[1291,12528,4095],{"class":3819},[1291,12530,3738],{"class":3435},[1291,12532,9745],{"class":3812},[1291,12534,4107],{"class":3435},[1291,12536,12537,12539,12541,12543,12545,12547],{"class":3427,"line":675},[1291,12538,4112],{"class":3819},[1291,12540,3738],{"class":3435},[1291,12542,3691],{"class":3435},[1291,12544,4119],{"class":3439},[1291,12546,3691],{"class":3435},[1291,12548,4107],{"class":3435},[1291,12550,12551,12553],{"class":3427,"line":3542},[1291,12552,4128],{"class":3819},[1291,12554,4131],{"class":3435},[1291,12556,12557],{"class":3427,"line":3547},[1291,12558,3827],{"class":3435},[1291,12560,12561],{"class":3427,"line":3572},[1291,12562,3526],{"emptyLinePlaceholder":35},[1291,12564,12565,12567,12569,12571,12573],{"class":3427,"line":3614},[1291,12566,4140],{"class":3431},[1291,12568,3738],{"class":3435},[1291,12570,4145],{"class":3435},[1291,12572,4148],{"class":3431},[1291,12574,5267],{"class":3435},[1291,12576,12577],{"class":3427,"line":3640},[1291,12578,3526],{"emptyLinePlaceholder":35},[1291,12580,12581,12583,12585,12587,12589,12591],{"class":3427,"line":3665},[1291,12582,4522],{"class":3431},[1291,12584,3738],{"class":3435},[1291,12586,3601],{"class":3431},[1291,12588,694],{"class":3435},[1291,12590,9184],{"class":3812},[1291,12592,4871],{"class":3435},[1291,12594,12595],{"class":3427,"line":3670},[1291,12596,3526],{"emptyLinePlaceholder":35},[1291,12598,12599,12601,12603,12605,12607,12609,12611,12613,12615,12617,12619,12621,12623,12625],{"class":3427,"line":3677},[1291,12600,5370],{"class":3431},[1291,12602,3738],{"class":3435},[1291,12604,10800],{"class":3431},[1291,12606,694],{"class":3435},[1291,12608,10805],{"class":3812},[1291,12610,3816],{"class":3435},[1291,12612,10810],{"class":3819},[1291,12614,3738],{"class":3435},[1291,12616,6802],{"class":3451},[1291,12618,3566],{"class":3435},[1291,12620,10819],{"class":3819},[1291,12622,3738],{"class":3435},[1291,12624,10824],{"class":3451},[1291,12626,3827],{"class":3435},[1291,12628,12629],{"class":3427,"line":3877},[1291,12630,3526],{"emptyLinePlaceholder":35},[1291,12632,12633,12635,12637,12639,12641,12643],{"class":3427,"line":3916},[1291,12634,4292],{"class":3431},[1291,12636,3738],{"class":3435},[1291,12638,3591],{"class":3431},[1291,12640,694],{"class":3435},[1291,12642,10843],{"class":3812},[1291,12644,3874],{"class":3435},[1291,12646,12647,12649,12651,12653,12655,12657,12659,12661,12663,12665,12667,12669],{"class":3427,"line":4519},[1291,12648,10850],{"class":3819},[1291,12650,3738],{"class":3435},[1291,12652,10855],{"class":3812},[1291,12654,10858],{"class":3435},[1291,12656,10861],{"class":3819},[1291,12658,3738],{"class":3435},[1291,12660,3841],{"class":3812},[1291,12662,694],{"class":3435},[1291,12664,10870],{"class":3457},[1291,12666,694],{"class":3435},[1291,12668,4219],{"class":3812},[1291,12670,4871],{"class":3435},[1291,12672,12673],{"class":3427,"line":6038},[1291,12674,3827],{"class":3435},[1291,12676,12677],{"class":3427,"line":6043},[1291,12678,3526],{"emptyLinePlaceholder":35},[1291,12680,12681,12684,12686,12689],{"class":3427,"line":6066},[1291,12682,12683],{"class":3431},"hybrid_index ",[1291,12685,3738],{"class":3435},[1291,12687,12688],{"class":3812}," HybridIndexFactory",[1291,12690,3874],{"class":3435},[1291,12692,12693],{"class":3427,"line":6078},[1291,12694,12695],{"class":3435},"    [\n",[1291,12697,12698,12701],{"class":3427,"line":6089},[1291,12699,12700],{"class":3812},"        TantivyBM25Factory",[1291,12702,12703],{"class":3435},"(),\n",[1291,12705,12706,12709,12711,12713,12715,12717],{"class":3427,"line":6124},[1291,12707,12708],{"class":3812},"        BruteForceKnnFactory",[1291,12710,3816],{"class":3435},[1291,12712,4597],{"class":3819},[1291,12714,3738],{"class":3435},[1291,12716,4597],{"class":3812},[1291,12718,4242],{"class":3435},[1291,12720,12721],{"class":3427,"line":6133},[1291,12722,12238],{"class":3435},[1291,12724,12725],{"class":3427,"line":6141},[1291,12726,3827],{"class":3435},[1291,12728,12729],{"class":3427,"line":6151},[1291,12730,3526],{"emptyLinePlaceholder":35},[1291,12732,12733,12735,12737,12739,12741,12743,12745,12747,12749,12751,12753,12755,12757,12759,12761,12763],{"class":3427,"line":6923},[1291,12734,3586],{"class":3431},[1291,12736,3738],{"class":3435},[1291,12738,3596],{"class":3431},[1291,12740,694],{"class":3435},[1291,12742,10920],{"class":3812},[1291,12744,3816],{"class":3435},[1291,12746,10049],{"class":3819},[1291,12748,3738],{"class":3435},[1291,12750,3691],{"class":3435},[1291,12752,7849],{"class":3439},[1291,12754,3691],{"class":3435},[1291,12756,3566],{"class":3435},[1291,12758,10937],{"class":3819},[1291,12760,3738],{"class":3435},[1291,12762,10855],{"class":3812},[1291,12764,6237],{"class":3435},[1291,12766,12767],{"class":3427,"line":6928},[1291,12768,3526],{"emptyLinePlaceholder":35},[1291,12770,12771,12773,12775,12777],{"class":3427,"line":6934},[1291,12772,10614],{"class":3431},[1291,12774,3738],{"class":3435},[1291,12776,10956],{"class":3812},[1291,12778,3874],{"class":3435},[1291,12780,12781,12783,12785,12787,12789,12791,12793,12795,12797,12799,12801,12803,12805,12807,12809],{"class":3427,"line":6940},[1291,12782,10164],{"class":3819},[1291,12784,3738],{"class":3435},[1291,12786,4585],{"class":3812},[1291,12788,3566],{"class":3435},[1291,12790,9401],{"class":3819},[1291,12792,3738],{"class":3435},[1291,12794,4621],{"class":3812},[1291,12796,3566],{"class":3435},[1291,12798,5566],{"class":3819},[1291,12800,3738],{"class":3435},[1291,12802,10983],{"class":3812},[1291,12804,3566],{"class":3435},[1291,12806,10988],{"class":3819},[1291,12808,3738],{"class":3435},[1291,12810,12811],{"class":3812},"hybrid_index\n",[1291,12813,12814],{"class":3427,"line":6952},[1291,12815,3827],{"class":3435},[1291,12817,12818],{"class":3427,"line":6984},[1291,12819,3526],{"emptyLinePlaceholder":35},[1291,12821,12822,12824,12826,12828,12830,12832,12834],{"class":3427,"line":7996},[1291,12823,11006],{"class":3431},[1291,12825,4390],{"class":3435},[1291,12827,9387],{"class":6356},[1291,12829,3702],{"class":3435},[1291,12831,11015],{"class":3435},[1291,12833,11018],{"class":3439},[1291,12835,11021],{"class":3435},[1291,12837,12838,12840],{"class":3427,"line":8007},[1291,12839,11026],{"class":3439},[1291,12841,11021],{"class":3435},[1291,12843,12844],{"class":3427,"line":8018},[1291,12845,11033],{"class":3439},[1291,12847,12848,12850],{"class":3427,"line":8029},[1291,12849,11038],{"class":3439},[1291,12851,11041],{"class":3451},[1291,12853,12854],{"class":3427,"line":8040},[1291,12855,3526],{"emptyLinePlaceholder":35},[1291,12857,12858,12860],{"class":3427,"line":8051},[1291,12859,11050],{"class":3439},[1291,12861,6019],{"class":3451},[1291,12863,12864],{"class":3427,"line":8057},[1291,12865,3526],{"emptyLinePlaceholder":35},[1291,12867,12868,12870],{"class":3427,"line":8068},[1291,12869,11061],{"class":3439},[1291,12871,6035],{"class":3435},[1291,12873,12874],{"class":3427,"line":8079},[1291,12875,3526],{"emptyLinePlaceholder":35},[1291,12877,12878,12880,12882,12884],{"class":3427,"line":8090},[1291,12879,11077],{"class":3431},[1291,12881,3738],{"class":3435},[1291,12883,4654],{"class":3812},[1291,12885,3874],{"class":3435},[1291,12887,12888,12890,12892,12894],{"class":3427,"line":8101},[1291,12889,10105],{"class":3819},[1291,12891,3738],{"class":3435},[1291,12893,3627],{"class":3812},[1291,12895,4107],{"class":3435},[1291,12897,12898,12900,12902,12904],{"class":3427,"line":8112},[1291,12899,11098],{"class":3819},[1291,12901,3738],{"class":3435},[1291,12903,11103],{"class":3812},[1291,12905,4107],{"class":3435},[1291,12907,12908,12910,12912,12914],{"class":3427,"line":8117},[1291,12909,11110],{"class":3819},[1291,12911,3738],{"class":3435},[1291,12913,11006],{"class":3812},[1291,12915,4107],{"class":3435},[1291,12917,12918],{"class":3427,"line":8128},[1291,12919,3827],{"class":3435},[1291,12921,12922],{"class":3427,"line":8139},[1291,12923,3526],{"emptyLinePlaceholder":35},[1291,12925,12926],{"class":3427,"line":8150},[1291,12927,3526],{"emptyLinePlaceholder":35},[1291,12929,12930,12932,12934,12936,12938,12940,12942,12944,12946,12948],{"class":3427,"line":8156},[1291,12931,5536],{"class":3431},[1291,12933,3738],{"class":3435},[1291,12935,11226],{"class":3812},[1291,12937,3816],{"class":3435},[1291,12939,11169],{"class":3812},[1291,12941,3566],{"class":3435},[1291,12943,11235],{"class":3812},[1291,12945,3566],{"class":3435},[1291,12947,11240],{"class":3812},[1291,12949,3827],{"class":3435},[1291,12951,12952],{"class":3427,"line":8162},[1291,12953,3526],{"emptyLinePlaceholder":35},[1291,12955,12956,12958,12960,12962,12964,12966,12968,12970,12972,12974,12976,12978,12980,12982,12984,12986,12988,12990],{"class":3427,"line":8168},[1291,12957,11251],{"class":3431},[1291,12959,3738],{"class":3435},[1291,12961,11256],{"class":3431},[1291,12963,694],{"class":3435},[1291,12965,11261],{"class":3812},[1291,12967,3816],{"class":3435},[1291,12969,4813],{"class":3819},[1291,12971,3738],{"class":3435},[1291,12973,5580],{"class":3812},[1291,12975,694],{"class":3435},[1291,12977,11274],{"class":3457},[1291,12979,3566],{"class":3435},[1291,12981,11279],{"class":3819},[1291,12983,3738],{"class":3435},[1291,12985,11284],{"class":6356},[1291,12987,3816],{"class":3435},[1291,12989,11289],{"class":3819},[1291,12991,11292],{"class":3435},[3418,12993,12994],{"className":3420,"code":11298,"language":3422,"meta":23,"style":23},[3061,12995,12996],{"__ignoreMap":23},[1291,12997,12998,13000,13002,13004],{"class":3427,"line":3428},[1291,12999,11305],{"class":3431},[1291,13001,694],{"class":3435},[1291,13003,4868],{"class":3812},[1291,13005,4871],{"class":3435},[3418,13007,13009],{"className":3420,"code":13008,"language":3422,"meta":23,"style":23},"pathway_client = RAGClient(pathway_host, pathway_port)\npathway_client.list_documents()\n",[3061,13010,13011,13029],{"__ignoreMap":23},[1291,13012,13013,13015,13017,13019,13021,13023,13025,13027],{"class":3427,"line":3428},[1291,13014,11353],{"class":3431},[1291,13016,3738],{"class":3435},[1291,13018,4918],{"class":3812},[1291,13020,3816],{"class":3435},[1291,13022,11169],{"class":3812},[1291,13024,3566],{"class":3435},[1291,13026,11235],{"class":3812},[1291,13028,3827],{"class":3435},[1291,13030,13031,13033,13035,13037],{"class":3427,"line":24},[1291,13032,11372],{"class":3431},[1291,13034,694],{"class":3435},[1291,13036,11377],{"class":3812},[1291,13038,4871],{"class":3435},[3418,13040,13042],{"className":3420,"code":13041,"language":3422,"meta":23,"style":23},"predicted_dataset_hybrid_index = predict_test_dataset(dataset)\n",[3061,13043,13044],{"__ignoreMap":23},[1291,13045,13046,13049,13051,13053,13055,13057],{"class":3427,"line":3428},[1291,13047,13048],{"class":3431},"predicted_dataset_hybrid_index ",[1291,13050,3738],{"class":3435},[1291,13052,11401],{"class":3812},[1291,13054,3816],{"class":3435},[1291,13056,9256],{"class":3812},[1291,13058,3827],{"class":3435},[3418,13060,13062],{"className":3420,"code":13061,"language":3422,"meta":23,"style":23},"ragas_evals_dataset_hybrid_index = run_ragas_evaluations(predicted_dataset_hybrid_index)\n",[3061,13063,13064],{"__ignoreMap":23},[1291,13065,13066,13069,13071,13073,13075,13078],{"class":3427,"line":3428},[1291,13067,13068],{"class":3431},"ragas_evals_dataset_hybrid_index ",[1291,13070,3738],{"class":3435},[1291,13072,11937],{"class":3812},[1291,13074,3816],{"class":3435},[1291,13076,13077],{"class":3812},"predicted_dataset_hybrid_index",[1291,13079,3827],{"class":3435},[3418,13081,13083],{"className":3420,"code":13082,"language":3422,"meta":23,"style":23},"ragas_evals_dataset_hybrid_index\n",[3061,13084,13085],{"__ignoreMap":23},[1291,13086,13087],{"class":3427,"line":3428},[1291,13088,13082],{"class":3431},[3418,13090,13093],{"className":13091,"code":13092,"language":4999},[4997],"{'answer_correctness': 0.5821, 'faithfulness': 0.5228, 'context_recall': 0.8966, 'context_precision': 0.8343}\n",[3061,13094,13092],{"__ignoreMap":23},[73,13096,13097,13098,13101],{},"We see that just by introducing hybrid retrieval, we improved the correctness metric by ",[3061,13099,13100],{},"10%",". Let's see if we can improve on that.",[3418,13103,13105],{"className":3420,"code":13104,"language":3422,"meta":23,"style":23},"predicted_dataset_hybrid_index.to_pandas()\n",[3061,13106,13107],{"__ignoreMap":23},[1291,13108,13109,13111,13113,13115],{"class":3427,"line":3428},[1291,13110,13077],{"class":3431},[1291,13112,694],{"class":3435},[1291,13114,10338],{"class":3812},[1291,13116,4871],{"class":3435},[3418,13118,13120],{"className":3420,"code":13119,"language":3422,"meta":23,"style":23},"# terminate the Pathway app\n\nserver_process.terminate()\nserver_process.join()\n\nclear_pathway_graph()\n",[3061,13121,13122,13127,13131,13141,13151,13155],{"__ignoreMap":23},[1291,13123,13124],{"class":3427,"line":3428},[1291,13125,13126],{"class":3673},"# terminate the Pathway app\n",[1291,13128,13129],{"class":3427,"line":24},[1291,13130,3526],{"emptyLinePlaceholder":35},[1291,13132,13133,13135,13137,13139],{"class":3427,"line":675},[1291,13134,11305],{"class":3431},[1291,13136,694],{"class":3435},[1291,13138,12364],{"class":3812},[1291,13140,4871],{"class":3435},[1291,13142,13143,13145,13147,13149],{"class":3427,"line":3542},[1291,13144,11305],{"class":3431},[1291,13146,694],{"class":3435},[1291,13148,9544],{"class":3812},[1291,13150,4871],{"class":3435},[1291,13152,13153],{"class":3427,"line":3547},[1291,13154,3526],{"emptyLinePlaceholder":35},[1291,13156,13157,13159],{"class":3427,"line":3572},[1291,13158,12449],{"class":3812},[1291,13160,4871],{"class":3435},[3189,13162,13164],{"id":13163},"using-a-different-parser","Using a Different Parser",[73,13166,13167,13168,13172],{},"Parsing is a crucial yet often overlooked component of RAG solutions. The quality of your retrieval depends heavily on how well your data is parsed—garbage in, garbage out. A robust parser can significantly enhance your solution, while a poor one can break it. Pathway provides several ready-to-use parsers out of the box, see the ",[77,13169,13171],{"href":10443,"rel":13170},[81],"documentation",". You also have the flexibility to develop and integrate custom parsers tailored to your specific needs.",[3418,13174,13176],{"className":3420,"code":13175,"language":3422,"meta":23,"style":23},"folder = pw.io.fs.read(\n    path=INPUT_FOLDER,\n    format=\"binary\",\n    with_metadata=True,\n)\n\nsources = [folder]\n\nparser = parsers.PypdfParser()\n\ntext_splitter = None\n\nembedder = embedders.OpenAIEmbedder(\n    cache_strategy=DiskCache(), retry_strategy=pw.udfs.ExponentialBackoffRetryStrategy()\n)\n\nhybrid_index = HybridIndexFactory(\n    [\n        TantivyBM25Factory(),\n        BruteForceKnnFactory(embedder=embedder),\n    ]\n)\n\nllm = llms.OpenAIChat(model=\"gpt-4o\", cache_strategy=DiskCache())\n\ndocument_store = DocumentStore(\n    docs=sources, parser=parser, splitter=text_splitter, retriever_factory=hybrid_index\n)\n\nprompt_template: str = \"\"\"You are an assistant for question-answering tasks. \\\nUse the following pieces of retrieved context to answer the question. \\\nIf you don't know the answer, just say that you don't know.\n  Question: {query}\n\n  Context: {context}\n\n  Answer:\"\"\"\n\nrag_app = BaseRAGQuestionAnswerer(\n    llm=llm,\n    indexer=document_store,\n    prompt_template=prompt_template,\n)\n\n\nserver = QASummaryRestServer(pathway_host, pathway_port, rag_app)\n\nserver_process = multiprocessing.Process(target=server.run, kwargs=dict(threaded=False))\n",[3061,13177,13178,13200,13210,13224,13230,13234,13238,13250,13254,13268,13272,13281,13285,13299,13325,13329,13333,13343,13347,13353,13367,13371,13375,13379,13413,13417,13427,13459,13463,13467,13483,13489,13493,13499,13503,13509,13513,13519,13523,13533,13543,13553,13563,13567,13571,13575,13597,13601],{"__ignoreMap":23},[1291,13179,13180,13182,13184,13186,13188,13190,13192,13194,13196,13198],{"class":3427,"line":3428},[1291,13181,4068],{"class":3431},[1291,13183,3738],{"class":3435},[1291,13185,4073],{"class":3431},[1291,13187,694],{"class":3435},[1291,13189,4078],{"class":3457},[1291,13191,694],{"class":3435},[1291,13193,4083],{"class":3457},[1291,13195,694],{"class":3435},[1291,13197,4088],{"class":3812},[1291,13199,3874],{"class":3435},[1291,13201,13202,13204,13206,13208],{"class":3427,"line":24},[1291,13203,4095],{"class":3819},[1291,13205,3738],{"class":3435},[1291,13207,9745],{"class":3812},[1291,13209,4107],{"class":3435},[1291,13211,13212,13214,13216,13218,13220,13222],{"class":3427,"line":675},[1291,13213,4112],{"class":3819},[1291,13215,3738],{"class":3435},[1291,13217,3691],{"class":3435},[1291,13219,4119],{"class":3439},[1291,13221,3691],{"class":3435},[1291,13223,4107],{"class":3435},[1291,13225,13226,13228],{"class":3427,"line":3542},[1291,13227,4128],{"class":3819},[1291,13229,4131],{"class":3435},[1291,13231,13232],{"class":3427,"line":3547},[1291,13233,3827],{"class":3435},[1291,13235,13236],{"class":3427,"line":3572},[1291,13237,3526],{"emptyLinePlaceholder":35},[1291,13239,13240,13242,13244,13246,13248],{"class":3427,"line":3614},[1291,13241,4140],{"class":3431},[1291,13243,3738],{"class":3435},[1291,13245,4145],{"class":3435},[1291,13247,4148],{"class":3431},[1291,13249,5267],{"class":3435},[1291,13251,13252],{"class":3427,"line":3640},[1291,13253,3526],{"emptyLinePlaceholder":35},[1291,13255,13256,13258,13260,13262,13264,13266],{"class":3427,"line":3665},[1291,13257,4522],{"class":3431},[1291,13259,3738],{"class":3435},[1291,13261,3601],{"class":3431},[1291,13263,694],{"class":3435},[1291,13265,10357],{"class":3812},[1291,13267,4871],{"class":3435},[1291,13269,13270],{"class":3427,"line":3670},[1291,13271,3526],{"emptyLinePlaceholder":35},[1291,13273,13274,13276,13278],{"class":3427,"line":3677},[1291,13275,5370],{"class":3431},[1291,13277,3738],{"class":3435},[1291,13279,13280],{"class":3435}," None\n",[1291,13282,13283],{"class":3427,"line":3877},[1291,13284,3526],{"emptyLinePlaceholder":35},[1291,13286,13287,13289,13291,13293,13295,13297],{"class":3427,"line":3916},[1291,13288,4292],{"class":3431},[1291,13290,3738],{"class":3435},[1291,13292,3591],{"class":3431},[1291,13294,694],{"class":3435},[1291,13296,10843],{"class":3812},[1291,13298,3874],{"class":3435},[1291,13300,13301,13303,13305,13307,13309,13311,13313,13315,13317,13319,13321,13323],{"class":3427,"line":4519},[1291,13302,10850],{"class":3819},[1291,13304,3738],{"class":3435},[1291,13306,10855],{"class":3812},[1291,13308,10858],{"class":3435},[1291,13310,10861],{"class":3819},[1291,13312,3738],{"class":3435},[1291,13314,3841],{"class":3812},[1291,13316,694],{"class":3435},[1291,13318,10870],{"class":3457},[1291,13320,694],{"class":3435},[1291,13322,4219],{"class":3812},[1291,13324,4871],{"class":3435},[1291,13326,13327],{"class":3427,"line":6038},[1291,13328,3827],{"class":3435},[1291,13330,13331],{"class":3427,"line":6043},[1291,13332,3526],{"emptyLinePlaceholder":35},[1291,13334,13335,13337,13339,13341],{"class":3427,"line":6066},[1291,13336,12683],{"class":3431},[1291,13338,3738],{"class":3435},[1291,13340,12688],{"class":3812},[1291,13342,3874],{"class":3435},[1291,13344,13345],{"class":3427,"line":6078},[1291,13346,12695],{"class":3435},[1291,13348,13349,13351],{"class":3427,"line":6089},[1291,13350,12700],{"class":3812},[1291,13352,12703],{"class":3435},[1291,13354,13355,13357,13359,13361,13363,13365],{"class":3427,"line":6124},[1291,13356,12708],{"class":3812},[1291,13358,3816],{"class":3435},[1291,13360,4597],{"class":3819},[1291,13362,3738],{"class":3435},[1291,13364,4597],{"class":3812},[1291,13366,4242],{"class":3435},[1291,13368,13369],{"class":3427,"line":6133},[1291,13370,12238],{"class":3435},[1291,13372,13373],{"class":3427,"line":6141},[1291,13374,3827],{"class":3435},[1291,13376,13377],{"class":3427,"line":6151},[1291,13378,3526],{"emptyLinePlaceholder":35},[1291,13380,13381,13383,13385,13387,13389,13391,13393,13395,13397,13399,13401,13403,13405,13407,13409,13411],{"class":3427,"line":6923},[1291,13382,3586],{"class":3431},[1291,13384,3738],{"class":3435},[1291,13386,3596],{"class":3431},[1291,13388,694],{"class":3435},[1291,13390,10920],{"class":3812},[1291,13392,3816],{"class":3435},[1291,13394,10049],{"class":3819},[1291,13396,3738],{"class":3435},[1291,13398,3691],{"class":3435},[1291,13400,7849],{"class":3439},[1291,13402,3691],{"class":3435},[1291,13404,3566],{"class":3435},[1291,13406,10937],{"class":3819},[1291,13408,3738],{"class":3435},[1291,13410,10855],{"class":3812},[1291,13412,6237],{"class":3435},[1291,13414,13415],{"class":3427,"line":6928},[1291,13416,3526],{"emptyLinePlaceholder":35},[1291,13418,13419,13421,13423,13425],{"class":3427,"line":6934},[1291,13420,10614],{"class":3431},[1291,13422,3738],{"class":3435},[1291,13424,10956],{"class":3812},[1291,13426,3874],{"class":3435},[1291,13428,13429,13431,13433,13435,13437,13439,13441,13443,13445,13447,13449,13451,13453,13455,13457],{"class":3427,"line":6940},[1291,13430,10164],{"class":3819},[1291,13432,3738],{"class":3435},[1291,13434,4585],{"class":3812},[1291,13436,3566],{"class":3435},[1291,13438,9401],{"class":3819},[1291,13440,3738],{"class":3435},[1291,13442,4621],{"class":3812},[1291,13444,3566],{"class":3435},[1291,13446,5566],{"class":3819},[1291,13448,3738],{"class":3435},[1291,13450,10983],{"class":3812},[1291,13452,3566],{"class":3435},[1291,13454,10988],{"class":3819},[1291,13456,3738],{"class":3435},[1291,13458,12811],{"class":3812},[1291,13460,13461],{"class":3427,"line":6952},[1291,13462,3827],{"class":3435},[1291,13464,13465],{"class":3427,"line":6984},[1291,13466,3526],{"emptyLinePlaceholder":35},[1291,13468,13469,13471,13473,13475,13477,13479,13481],{"class":3427,"line":7996},[1291,13470,11006],{"class":3431},[1291,13472,4390],{"class":3435},[1291,13474,9387],{"class":6356},[1291,13476,3702],{"class":3435},[1291,13478,11015],{"class":3435},[1291,13480,11018],{"class":3439},[1291,13482,11021],{"class":3435},[1291,13484,13485,13487],{"class":3427,"line":8007},[1291,13486,11026],{"class":3439},[1291,13488,11021],{"class":3435},[1291,13490,13491],{"class":3427,"line":8018},[1291,13492,11033],{"class":3439},[1291,13494,13495,13497],{"class":3427,"line":8029},[1291,13496,11038],{"class":3439},[1291,13498,11041],{"class":3451},[1291,13500,13501],{"class":3427,"line":8040},[1291,13502,3526],{"emptyLinePlaceholder":35},[1291,13504,13505,13507],{"class":3427,"line":8051},[1291,13506,11050],{"class":3439},[1291,13508,6019],{"class":3451},[1291,13510,13511],{"class":3427,"line":8057},[1291,13512,3526],{"emptyLinePlaceholder":35},[1291,13514,13515,13517],{"class":3427,"line":8068},[1291,13516,11061],{"class":3439},[1291,13518,6035],{"class":3435},[1291,13520,13521],{"class":3427,"line":8079},[1291,13522,3526],{"emptyLinePlaceholder":35},[1291,13524,13525,13527,13529,13531],{"class":3427,"line":8090},[1291,13526,11077],{"class":3431},[1291,13528,3738],{"class":3435},[1291,13530,4654],{"class":3812},[1291,13532,3874],{"class":3435},[1291,13534,13535,13537,13539,13541],{"class":3427,"line":8101},[1291,13536,10105],{"class":3819},[1291,13538,3738],{"class":3435},[1291,13540,3627],{"class":3812},[1291,13542,4107],{"class":3435},[1291,13544,13545,13547,13549,13551],{"class":3427,"line":8112},[1291,13546,11098],{"class":3819},[1291,13548,3738],{"class":3435},[1291,13550,11103],{"class":3812},[1291,13552,4107],{"class":3435},[1291,13554,13555,13557,13559,13561],{"class":3427,"line":8117},[1291,13556,11110],{"class":3819},[1291,13558,3738],{"class":3435},[1291,13560,11006],{"class":3812},[1291,13562,4107],{"class":3435},[1291,13564,13565],{"class":3427,"line":8128},[1291,13566,3827],{"class":3435},[1291,13568,13569],{"class":3427,"line":8139},[1291,13570,3526],{"emptyLinePlaceholder":35},[1291,13572,13573],{"class":3427,"line":8150},[1291,13574,3526],{"emptyLinePlaceholder":35},[1291,13576,13577,13579,13581,13583,13585,13587,13589,13591,13593,13595],{"class":3427,"line":8156},[1291,13578,5536],{"class":3431},[1291,13580,3738],{"class":3435},[1291,13582,11226],{"class":3812},[1291,13584,3816],{"class":3435},[1291,13586,11169],{"class":3812},[1291,13588,3566],{"class":3435},[1291,13590,11235],{"class":3812},[1291,13592,3566],{"class":3435},[1291,13594,11240],{"class":3812},[1291,13596,3827],{"class":3435},[1291,13598,13599],{"class":3427,"line":8162},[1291,13600,3526],{"emptyLinePlaceholder":35},[1291,13602,13603,13605,13607,13609,13611,13613,13615,13617,13619,13621,13623,13625,13627,13629,13631,13633,13635,13637],{"class":3427,"line":8168},[1291,13604,11251],{"class":3431},[1291,13606,3738],{"class":3435},[1291,13608,11256],{"class":3431},[1291,13610,694],{"class":3435},[1291,13612,11261],{"class":3812},[1291,13614,3816],{"class":3435},[1291,13616,4813],{"class":3819},[1291,13618,3738],{"class":3435},[1291,13620,5580],{"class":3812},[1291,13622,694],{"class":3435},[1291,13624,11274],{"class":3457},[1291,13626,3566],{"class":3435},[1291,13628,11279],{"class":3819},[1291,13630,3738],{"class":3435},[1291,13632,11284],{"class":6356},[1291,13634,3816],{"class":3435},[1291,13636,11289],{"class":3819},[1291,13638,11292],{"class":3435},[3418,13640,13641],{"className":3420,"code":11298,"language":3422,"meta":23,"style":23},[3061,13642,13643],{"__ignoreMap":23},[1291,13644,13645,13647,13649,13651],{"class":3427,"line":3428},[1291,13646,11305],{"class":3431},[1291,13648,694],{"class":3435},[1291,13650,4868],{"class":3812},[1291,13652,4871],{"class":3435},[3418,13654,13655],{"className":3420,"code":13008,"language":3422,"meta":23,"style":23},[3061,13656,13657,13675],{"__ignoreMap":23},[1291,13658,13659,13661,13663,13665,13667,13669,13671,13673],{"class":3427,"line":3428},[1291,13660,11353],{"class":3431},[1291,13662,3738],{"class":3435},[1291,13664,4918],{"class":3812},[1291,13666,3816],{"class":3435},[1291,13668,11169],{"class":3812},[1291,13670,3566],{"class":3435},[1291,13672,11235],{"class":3812},[1291,13674,3827],{"class":3435},[1291,13676,13677,13679,13681,13683],{"class":3427,"line":24},[1291,13678,11372],{"class":3431},[1291,13680,694],{"class":3435},[1291,13682,11377],{"class":3812},[1291,13684,4871],{"class":3435},[3418,13686,13688],{"className":3420,"code":13687,"language":3422,"meta":23,"style":23},"predicted_dataset_pypdf_parser = predict_test_dataset(dataset)\nragas_evals_dataset_pypdf_parser = run_ragas_evaluations(predicted_dataset_pypdf_parser)\n",[3061,13689,13690,13705],{"__ignoreMap":23},[1291,13691,13692,13695,13697,13699,13701,13703],{"class":3427,"line":3428},[1291,13693,13694],{"class":3431},"predicted_dataset_pypdf_parser ",[1291,13696,3738],{"class":3435},[1291,13698,11401],{"class":3812},[1291,13700,3816],{"class":3435},[1291,13702,9256],{"class":3812},[1291,13704,3827],{"class":3435},[1291,13706,13707,13710,13712,13714,13716,13719],{"class":3427,"line":24},[1291,13708,13709],{"class":3431},"ragas_evals_dataset_pypdf_parser ",[1291,13711,3738],{"class":3435},[1291,13713,11937],{"class":3812},[1291,13715,3816],{"class":3435},[1291,13717,13718],{"class":3812},"predicted_dataset_pypdf_parser",[1291,13720,3827],{"class":3435},[3418,13722,13724],{"className":3420,"code":13723,"language":3422,"meta":23,"style":23},"ragas_evals_dataset_pypdf_parser\n",[3061,13725,13726],{"__ignoreMap":23},[1291,13727,13728],{"class":3427,"line":3428},[1291,13729,13723],{"class":3431},[3418,13731,13734],{"className":13732,"code":13733,"language":4999},[4997],"{'answer_correctness': 0.6896, 'faithfulness': 0.6609, 'context_recall': 0.9088, 'context_precision': 0.8035}\n",[3061,13735,13733],{"__ignoreMap":23},[73,13737,13738],{},"This had quite an impact! We managed to improve our last best score by more than 30%.",[3418,13740,13741],{"className":3420,"code":13119,"language":3422,"meta":23,"style":23},[3061,13742,13743,13747,13751,13761,13771,13775],{"__ignoreMap":23},[1291,13744,13745],{"class":3427,"line":3428},[1291,13746,13126],{"class":3673},[1291,13748,13749],{"class":3427,"line":24},[1291,13750,3526],{"emptyLinePlaceholder":35},[1291,13752,13753,13755,13757,13759],{"class":3427,"line":675},[1291,13754,11305],{"class":3431},[1291,13756,694],{"class":3435},[1291,13758,12364],{"class":3812},[1291,13760,4871],{"class":3435},[1291,13762,13763,13765,13767,13769],{"class":3427,"line":3542},[1291,13764,11305],{"class":3431},[1291,13766,694],{"class":3435},[1291,13768,9544],{"class":3812},[1291,13770,4871],{"class":3435},[1291,13772,13773],{"class":3427,"line":3547},[1291,13774,3526],{"emptyLinePlaceholder":35},[1291,13776,13777,13779],{"class":3427,"line":3572},[1291,13778,12449],{"class":3812},[1291,13780,4871],{"class":3435},[3189,13782,13784],{"id":13783},"lets-try-the-same-parser-with-the-semantic-search-retriever","Let's Try the Same Parser with the Semantic Search Retriever",[3418,13786,13788],{"className":3420,"code":13787,"language":3422,"meta":23,"style":23},"folder = pw.io.fs.read(\n    path=INPUT_FOLDER,\n    format=\"binary\",\n    with_metadata=True,\n)\n\nsources = [folder]\n\nparser = parsers.PypdfParser()\n# pypdf parser splits documents by the page, so we don't need another splitter\ntext_splitter = None\n\nembedder = embedders.OpenAIEmbedder(\n    cache_strategy=DiskCache(), retry_strategy=pw.udfs.ExponentialBackoffRetryStrategy()\n)\n\nllm = llms.OpenAIChat(model=\"gpt-4o\", cache_strategy=DiskCache())\n\ndocument_store = DocumentStore(\n    docs=sources,\n    parser=parser,\n    splitter=text_splitter,\n    retriever_factory=BruteForceKnnFactory(embedder=embedder),\n)\n\nprompt_template: str = \"\"\"You are an assistant for question-answering tasks. \\\nUse the following pieces of retrieved context to answer the question. \\\nIf you don't know the answer, just say that you don't know.\n  Question: {query}\n\n  Context: {context}\n\n  Answer:\"\"\"\n\nrag_app = BaseRAGQuestionAnswerer(\n    llm=llm,\n    indexer=document_store,\n    prompt_template=prompt_template,\n)\n\n\nserver = QASummaryRestServer(pathway_host, pathway_port, rag_app)\n\nserver_process = multiprocessing.Process(target=server.run, kwargs=dict(threaded=False))\n",[3061,13789,13790,13812,13822,13836,13842,13846,13850,13862,13866,13880,13885,13893,13897,13911,13937,13941,13945,13979,13983,13993,14003,14013,14023,14043,14047,14051,14067,14073,14077,14083,14087,14093,14097,14103,14107,14117,14127,14137,14147,14151,14155,14159,14181,14185],{"__ignoreMap":23},[1291,13791,13792,13794,13796,13798,13800,13802,13804,13806,13808,13810],{"class":3427,"line":3428},[1291,13793,4068],{"class":3431},[1291,13795,3738],{"class":3435},[1291,13797,4073],{"class":3431},[1291,13799,694],{"class":3435},[1291,13801,4078],{"class":3457},[1291,13803,694],{"class":3435},[1291,13805,4083],{"class":3457},[1291,13807,694],{"class":3435},[1291,13809,4088],{"class":3812},[1291,13811,3874],{"class":3435},[1291,13813,13814,13816,13818,13820],{"class":3427,"line":24},[1291,13815,4095],{"class":3819},[1291,13817,3738],{"class":3435},[1291,13819,9745],{"class":3812},[1291,13821,4107],{"class":3435},[1291,13823,13824,13826,13828,13830,13832,13834],{"class":3427,"line":675},[1291,13825,4112],{"class":3819},[1291,13827,3738],{"class":3435},[1291,13829,3691],{"class":3435},[1291,13831,4119],{"class":3439},[1291,13833,3691],{"class":3435},[1291,13835,4107],{"class":3435},[1291,13837,13838,13840],{"class":3427,"line":3542},[1291,13839,4128],{"class":3819},[1291,13841,4131],{"class":3435},[1291,13843,13844],{"class":3427,"line":3547},[1291,13845,3827],{"class":3435},[1291,13847,13848],{"class":3427,"line":3572},[1291,13849,3526],{"emptyLinePlaceholder":35},[1291,13851,13852,13854,13856,13858,13860],{"class":3427,"line":3614},[1291,13853,4140],{"class":3431},[1291,13855,3738],{"class":3435},[1291,13857,4145],{"class":3435},[1291,13859,4148],{"class":3431},[1291,13861,5267],{"class":3435},[1291,13863,13864],{"class":3427,"line":3640},[1291,13865,3526],{"emptyLinePlaceholder":35},[1291,13867,13868,13870,13872,13874,13876,13878],{"class":3427,"line":3665},[1291,13869,4522],{"class":3431},[1291,13871,3738],{"class":3435},[1291,13873,3601],{"class":3431},[1291,13875,694],{"class":3435},[1291,13877,10357],{"class":3812},[1291,13879,4871],{"class":3435},[1291,13881,13882],{"class":3427,"line":3670},[1291,13883,13884],{"class":3673},"# pypdf parser splits documents by the page, so we don't need another splitter\n",[1291,13886,13887,13889,13891],{"class":3427,"line":3677},[1291,13888,5370],{"class":3431},[1291,13890,3738],{"class":3435},[1291,13892,13280],{"class":3435},[1291,13894,13895],{"class":3427,"line":3877},[1291,13896,3526],{"emptyLinePlaceholder":35},[1291,13898,13899,13901,13903,13905,13907,13909],{"class":3427,"line":3916},[1291,13900,4292],{"class":3431},[1291,13902,3738],{"class":3435},[1291,13904,3591],{"class":3431},[1291,13906,694],{"class":3435},[1291,13908,10843],{"class":3812},[1291,13910,3874],{"class":3435},[1291,13912,13913,13915,13917,13919,13921,13923,13925,13927,13929,13931,13933,13935],{"class":3427,"line":4519},[1291,13914,10850],{"class":3819},[1291,13916,3738],{"class":3435},[1291,13918,10855],{"class":3812},[1291,13920,10858],{"class":3435},[1291,13922,10861],{"class":3819},[1291,13924,3738],{"class":3435},[1291,13926,3841],{"class":3812},[1291,13928,694],{"class":3435},[1291,13930,10870],{"class":3457},[1291,13932,694],{"class":3435},[1291,13934,4219],{"class":3812},[1291,13936,4871],{"class":3435},[1291,13938,13939],{"class":3427,"line":6038},[1291,13940,3827],{"class":3435},[1291,13942,13943],{"class":3427,"line":6043},[1291,13944,3526],{"emptyLinePlaceholder":35},[1291,13946,13947,13949,13951,13953,13955,13957,13959,13961,13963,13965,13967,13969,13971,13973,13975,13977],{"class":3427,"line":6066},[1291,13948,3586],{"class":3431},[1291,13950,3738],{"class":3435},[1291,13952,3596],{"class":3431},[1291,13954,694],{"class":3435},[1291,13956,10920],{"class":3812},[1291,13958,3816],{"class":3435},[1291,13960,10049],{"class":3819},[1291,13962,3738],{"class":3435},[1291,13964,3691],{"class":3435},[1291,13966,7849],{"class":3439},[1291,13968,3691],{"class":3435},[1291,13970,3566],{"class":3435},[1291,13972,10937],{"class":3819},[1291,13974,3738],{"class":3435},[1291,13976,10855],{"class":3812},[1291,13978,6237],{"class":3435},[1291,13980,13981],{"class":3427,"line":6078},[1291,13982,3526],{"emptyLinePlaceholder":35},[1291,13984,13985,13987,13989,13991],{"class":3427,"line":6089},[1291,13986,10614],{"class":3431},[1291,13988,3738],{"class":3435},[1291,13990,10956],{"class":3812},[1291,13992,3874],{"class":3435},[1291,13994,13995,13997,13999,14001],{"class":3427,"line":6124},[1291,13996,10164],{"class":3819},[1291,13998,3738],{"class":3435},[1291,14000,4585],{"class":3812},[1291,14002,4107],{"class":3435},[1291,14004,14005,14007,14009,14011],{"class":3427,"line":6133},[1291,14006,4616],{"class":3819},[1291,14008,3738],{"class":3435},[1291,14010,4621],{"class":3812},[1291,14012,4107],{"class":3435},[1291,14014,14015,14017,14019,14021],{"class":3427,"line":6141},[1291,14016,4604],{"class":3819},[1291,14018,3738],{"class":3435},[1291,14020,10983],{"class":3812},[1291,14022,4107],{"class":3435},[1291,14024,14025,14028,14030,14033,14035,14037,14039,14041],{"class":3427,"line":6151},[1291,14026,14027],{"class":3819},"    retriever_factory",[1291,14029,3738],{"class":3435},[1291,14031,14032],{"class":3812},"BruteForceKnnFactory",[1291,14034,3816],{"class":3435},[1291,14036,4597],{"class":3819},[1291,14038,3738],{"class":3435},[1291,14040,4597],{"class":3812},[1291,14042,4242],{"class":3435},[1291,14044,14045],{"class":3427,"line":6923},[1291,14046,3827],{"class":3435},[1291,14048,14049],{"class":3427,"line":6928},[1291,14050,3526],{"emptyLinePlaceholder":35},[1291,14052,14053,14055,14057,14059,14061,14063,14065],{"class":3427,"line":6934},[1291,14054,11006],{"class":3431},[1291,14056,4390],{"class":3435},[1291,14058,9387],{"class":6356},[1291,14060,3702],{"class":3435},[1291,14062,11015],{"class":3435},[1291,14064,11018],{"class":3439},[1291,14066,11021],{"class":3435},[1291,14068,14069,14071],{"class":3427,"line":6940},[1291,14070,11026],{"class":3439},[1291,14072,11021],{"class":3435},[1291,14074,14075],{"class":3427,"line":6952},[1291,14076,11033],{"class":3439},[1291,14078,14079,14081],{"class":3427,"line":6984},[1291,14080,11038],{"class":3439},[1291,14082,11041],{"class":3451},[1291,14084,14085],{"class":3427,"line":7996},[1291,14086,3526],{"emptyLinePlaceholder":35},[1291,14088,14089,14091],{"class":3427,"line":8007},[1291,14090,11050],{"class":3439},[1291,14092,6019],{"class":3451},[1291,14094,14095],{"class":3427,"line":8018},[1291,14096,3526],{"emptyLinePlaceholder":35},[1291,14098,14099,14101],{"class":3427,"line":8029},[1291,14100,11061],{"class":3439},[1291,14102,6035],{"class":3435},[1291,14104,14105],{"class":3427,"line":8040},[1291,14106,3526],{"emptyLinePlaceholder":35},[1291,14108,14109,14111,14113,14115],{"class":3427,"line":8051},[1291,14110,11077],{"class":3431},[1291,14112,3738],{"class":3435},[1291,14114,4654],{"class":3812},[1291,14116,3874],{"class":3435},[1291,14118,14119,14121,14123,14125],{"class":3427,"line":8057},[1291,14120,10105],{"class":3819},[1291,14122,3738],{"class":3435},[1291,14124,3627],{"class":3812},[1291,14126,4107],{"class":3435},[1291,14128,14129,14131,14133,14135],{"class":3427,"line":8068},[1291,14130,11098],{"class":3819},[1291,14132,3738],{"class":3435},[1291,14134,11103],{"class":3812},[1291,14136,4107],{"class":3435},[1291,14138,14139,14141,14143,14145],{"class":3427,"line":8079},[1291,14140,11110],{"class":3819},[1291,14142,3738],{"class":3435},[1291,14144,11006],{"class":3812},[1291,14146,4107],{"class":3435},[1291,14148,14149],{"class":3427,"line":8090},[1291,14150,3827],{"class":3435},[1291,14152,14153],{"class":3427,"line":8101},[1291,14154,3526],{"emptyLinePlaceholder":35},[1291,14156,14157],{"class":3427,"line":8112},[1291,14158,3526],{"emptyLinePlaceholder":35},[1291,14160,14161,14163,14165,14167,14169,14171,14173,14175,14177,14179],{"class":3427,"line":8117},[1291,14162,5536],{"class":3431},[1291,14164,3738],{"class":3435},[1291,14166,11226],{"class":3812},[1291,14168,3816],{"class":3435},[1291,14170,11169],{"class":3812},[1291,14172,3566],{"class":3435},[1291,14174,11235],{"class":3812},[1291,14176,3566],{"class":3435},[1291,14178,11240],{"class":3812},[1291,14180,3827],{"class":3435},[1291,14182,14183],{"class":3427,"line":8128},[1291,14184,3526],{"emptyLinePlaceholder":35},[1291,14186,14187,14189,14191,14193,14195,14197,14199,14201,14203,14205,14207,14209,14211,14213,14215,14217,14219,14221],{"class":3427,"line":8139},[1291,14188,11251],{"class":3431},[1291,14190,3738],{"class":3435},[1291,14192,11256],{"class":3431},[1291,14194,694],{"class":3435},[1291,14196,11261],{"class":3812},[1291,14198,3816],{"class":3435},[1291,14200,4813],{"class":3819},[1291,14202,3738],{"class":3435},[1291,14204,5580],{"class":3812},[1291,14206,694],{"class":3435},[1291,14208,11274],{"class":3457},[1291,14210,3566],{"class":3435},[1291,14212,11279],{"class":3819},[1291,14214,3738],{"class":3435},[1291,14216,11284],{"class":6356},[1291,14218,3816],{"class":3435},[1291,14220,11289],{"class":3819},[1291,14222,11292],{"class":3435},[3418,14224,14225],{"className":3420,"code":11298,"language":3422,"meta":23,"style":23},[3061,14226,14227],{"__ignoreMap":23},[1291,14228,14229,14231,14233,14235],{"class":3427,"line":3428},[1291,14230,11305],{"class":3431},[1291,14232,694],{"class":3435},[1291,14234,4868],{"class":3812},[1291,14236,4871],{"class":3435},[3418,14238,14240],{"className":3420,"code":14239,"language":3422,"meta":23,"style":23},"predicted_dataset_semantic = predict_test_dataset(dataset)\nragas_evals_dataset_semantic = run_ragas_evaluations(predicted_dataset_semantic)\n",[3061,14241,14242,14257],{"__ignoreMap":23},[1291,14243,14244,14247,14249,14251,14253,14255],{"class":3427,"line":3428},[1291,14245,14246],{"class":3431},"predicted_dataset_semantic ",[1291,14248,3738],{"class":3435},[1291,14250,11401],{"class":3812},[1291,14252,3816],{"class":3435},[1291,14254,9256],{"class":3812},[1291,14256,3827],{"class":3435},[1291,14258,14259,14262,14264,14266,14268,14271],{"class":3427,"line":24},[1291,14260,14261],{"class":3431},"ragas_evals_dataset_semantic ",[1291,14263,3738],{"class":3435},[1291,14265,11937],{"class":3812},[1291,14267,3816],{"class":3435},[1291,14269,14270],{"class":3812},"predicted_dataset_semantic",[1291,14272,3827],{"class":3435},[3418,14274,14276],{"className":3420,"code":14275,"language":3422,"meta":23,"style":23},"ragas_evals_dataset_semantic\n",[3061,14277,14278],{"__ignoreMap":23},[1291,14279,14280],{"class":3427,"line":3428},[1291,14281,14275],{"class":3431},[3418,14283,14286],{"className":14284,"code":14285,"language":4999},[4997],"{'answer_correctness': 0.7026, 'faithfulness': 0.6485, 'context_recall': 0.9382, 'context_precision': 0.7915}\n",[3061,14287,14285],{"__ignoreMap":23},[73,14289,14290],{},"We see that correctness and recall both increased slightly.",[3418,14292,14293],{"className":3420,"code":13119,"language":3422,"meta":23,"style":23},[3061,14294,14295,14299,14303,14313,14323,14327],{"__ignoreMap":23},[1291,14296,14297],{"class":3427,"line":3428},[1291,14298,13126],{"class":3673},[1291,14300,14301],{"class":3427,"line":24},[1291,14302,3526],{"emptyLinePlaceholder":35},[1291,14304,14305,14307,14309,14311],{"class":3427,"line":675},[1291,14306,11305],{"class":3431},[1291,14308,694],{"class":3435},[1291,14310,12364],{"class":3812},[1291,14312,4871],{"class":3435},[1291,14314,14315,14317,14319,14321],{"class":3427,"line":3542},[1291,14316,11305],{"class":3431},[1291,14318,694],{"class":3435},[1291,14320,9544],{"class":3812},[1291,14322,4871],{"class":3435},[1291,14324,14325],{"class":3427,"line":3547},[1291,14326,3526],{"emptyLinePlaceholder":35},[1291,14328,14329,14331],{"class":3427,"line":3572},[1291,14330,12449],{"class":3812},[1291,14332,4871],{"class":3435},[3189,14334,9070],{"id":14335},"changing-the-embedder",[73,14337,14338,14339,14341,14342,14345,14346,14349],{},"Up until now, we had been using the OpenAI's ",[3061,14340,7929],{}," as the embedding model. You may also try the larger and better ",[3061,14343,14344],{},"text-embedding-3-large"," or the cheaper and smaller ",[3061,14347,14348],{},"text-embedding-3-small"," models.",[73,14351,14352,14353,694],{},"Although API based embedders are well performing and easy to use, if you are concerned about data privacy, you will need a locally hosted embedder. Pathway enables you to use local & open-source models through ",[77,14354,14357],{"href":14355,"rel":14356},"https:\u002F\u002Fpathway.com\u002Fdevelopers\u002Fapi-docs\u002Fpathway-xpacks-llm\u002Fembedders#pathway.xpacks.llm.embedders.SentenceTransformerEmbedder",[81],"embedders.SentenceTransformerEmbedder",[73,14359,14360,14361,14364,14365,14368,14369,14372,14373,14375],{},"Some of the popular open-source embedders are ",[3061,14362,14363],{},"gte-large-en-v1.5",", ",[3061,14366,14367],{},"bge-m3",", and ",[3061,14370,14371],{},"nomic-embed-text-v1.5",". We found that ",[3061,14374,14363],{}," usually produces good results, so let's try swapping the embedder with it.",[3418,14377,14379],{"className":3420,"code":14378,"language":3422,"meta":23,"style":23},"os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n",[3061,14380,14381],{"__ignoreMap":23},[1291,14382,14383,14385,14387,14389,14391,14393,14396,14398,14400,14402,14404,14407],{"class":3427,"line":3428},[1291,14384,3680],{"class":3431},[1291,14386,694],{"class":3435},[1291,14388,3685],{"class":3457},[1291,14390,3688],{"class":3435},[1291,14392,3691],{"class":3435},[1291,14394,14395],{"class":3439},"TOKENIZERS_PARALLELISM",[1291,14397,3691],{"class":3435},[1291,14399,3699],{"class":3435},[1291,14401,3702],{"class":3435},[1291,14403,3705],{"class":3435},[1291,14405,14406],{"class":3439},"false",[1291,14408,3746],{"class":3435},[3418,14410,14412],{"className":3420,"code":14411,"language":3422,"meta":23,"style":23},"folder = pw.io.fs.read(\n    path=INPUT_FOLDER,\n    format=\"binary\",\n    with_metadata=True,\n)\n\nsources = [folder]\n\nparser = parsers.PypdfParser()\n# pypdf parser splits documents by the page, so we don't need another splitter\ntext_splitter = None\n\nembedder = embedders.SentenceTransformerEmbedder(\n    model=\"Alibaba-NLP\u002Fgte-large-en-v1.5\",\n    call_kwargs={\"show_progress_bar\": False},\n    trust_remote_code=True,\n)\n\nllm = llms.OpenAIChat(model=\"gpt-4o\", cache_strategy=DiskCache())\n\ndocument_store = DocumentStore(\n    docs=sources,\n    parser=parser,\n    splitter=text_splitter,\n    retriever_factory=BruteForceKnnFactory(embedder=embedder),\n)\n\nprompt_template: str = \"\"\"You are an assistant for question-answering tasks. \\\nUse the following pieces of retrieved context to answer the question. \\\nIf you don't know the answer, just say that you don't know.\n  Question: {query}\n\n  Context: {context}\n\n  Answer:\"\"\"\n\nrag_app = BaseRAGQuestionAnswerer(\n    llm=llm,\n    indexer=document_store,\n    prompt_template=prompt_template,\n)\n\n\nserver = QASummaryRestServer(pathway_host, pathway_port, rag_app)\n\nserver_process = multiprocessing.Process(target=server.run, kwargs=dict(threaded=False))\n",[3061,14413,14414,14436,14446,14460,14466,14470,14474,14486,14490,14504,14508,14516,14520,14535,14550,14570,14577,14581,14585,14619,14623,14633,14643,14653,14663,14681,14685,14689,14705,14711,14715,14721,14725,14731,14735,14741,14745,14755,14765,14775,14785,14789,14793,14797,14819,14823],{"__ignoreMap":23},[1291,14415,14416,14418,14420,14422,14424,14426,14428,14430,14432,14434],{"class":3427,"line":3428},[1291,14417,4068],{"class":3431},[1291,14419,3738],{"class":3435},[1291,14421,4073],{"class":3431},[1291,14423,694],{"class":3435},[1291,14425,4078],{"class":3457},[1291,14427,694],{"class":3435},[1291,14429,4083],{"class":3457},[1291,14431,694],{"class":3435},[1291,14433,4088],{"class":3812},[1291,14435,3874],{"class":3435},[1291,14437,14438,14440,14442,14444],{"class":3427,"line":24},[1291,14439,4095],{"class":3819},[1291,14441,3738],{"class":3435},[1291,14443,9745],{"class":3812},[1291,14445,4107],{"class":3435},[1291,14447,14448,14450,14452,14454,14456,14458],{"class":3427,"line":675},[1291,14449,4112],{"class":3819},[1291,14451,3738],{"class":3435},[1291,14453,3691],{"class":3435},[1291,14455,4119],{"class":3439},[1291,14457,3691],{"class":3435},[1291,14459,4107],{"class":3435},[1291,14461,14462,14464],{"class":3427,"line":3542},[1291,14463,4128],{"class":3819},[1291,14465,4131],{"class":3435},[1291,14467,14468],{"class":3427,"line":3547},[1291,14469,3827],{"class":3435},[1291,14471,14472],{"class":3427,"line":3572},[1291,14473,3526],{"emptyLinePlaceholder":35},[1291,14475,14476,14478,14480,14482,14484],{"class":3427,"line":3614},[1291,14477,4140],{"class":3431},[1291,14479,3738],{"class":3435},[1291,14481,4145],{"class":3435},[1291,14483,4148],{"class":3431},[1291,14485,5267],{"class":3435},[1291,14487,14488],{"class":3427,"line":3640},[1291,14489,3526],{"emptyLinePlaceholder":35},[1291,14491,14492,14494,14496,14498,14500,14502],{"class":3427,"line":3665},[1291,14493,4522],{"class":3431},[1291,14495,3738],{"class":3435},[1291,14497,3601],{"class":3431},[1291,14499,694],{"class":3435},[1291,14501,10357],{"class":3812},[1291,14503,4871],{"class":3435},[1291,14505,14506],{"class":3427,"line":3670},[1291,14507,13884],{"class":3673},[1291,14509,14510,14512,14514],{"class":3427,"line":3677},[1291,14511,5370],{"class":3431},[1291,14513,3738],{"class":3435},[1291,14515,13280],{"class":3435},[1291,14517,14518],{"class":3427,"line":3877},[1291,14519,3526],{"emptyLinePlaceholder":35},[1291,14521,14522,14524,14526,14528,14530,14533],{"class":3427,"line":3916},[1291,14523,4292],{"class":3431},[1291,14525,3738],{"class":3435},[1291,14527,3591],{"class":3431},[1291,14529,694],{"class":3435},[1291,14531,14532],{"class":3812},"SentenceTransformerEmbedder",[1291,14534,3874],{"class":3435},[1291,14536,14537,14539,14541,14543,14546,14548],{"class":3427,"line":4519},[1291,14538,4195],{"class":3819},[1291,14540,3738],{"class":3435},[1291,14542,3691],{"class":3435},[1291,14544,14545],{"class":3439},"Alibaba-NLP\u002Fgte-large-en-v1.5",[1291,14547,3691],{"class":3435},[1291,14549,4107],{"class":3435},[1291,14551,14552,14555,14558,14560,14563,14565,14567],{"class":3427,"line":6038},[1291,14553,14554],{"class":3819},"    call_kwargs",[1291,14556,14557],{"class":3435},"={",[1291,14559,3691],{"class":3435},[1291,14561,14562],{"class":3439},"show_progress_bar",[1291,14564,3691],{"class":3435},[1291,14566,4390],{"class":3435},[1291,14568,14569],{"class":3435}," False},\n",[1291,14571,14572,14575],{"class":3427,"line":6043},[1291,14573,14574],{"class":3819},"    trust_remote_code",[1291,14576,4131],{"class":3435},[1291,14578,14579],{"class":3427,"line":6066},[1291,14580,3827],{"class":3435},[1291,14582,14583],{"class":3427,"line":6078},[1291,14584,3526],{"emptyLinePlaceholder":35},[1291,14586,14587,14589,14591,14593,14595,14597,14599,14601,14603,14605,14607,14609,14611,14613,14615,14617],{"class":3427,"line":6089},[1291,14588,3586],{"class":3431},[1291,14590,3738],{"class":3435},[1291,14592,3596],{"class":3431},[1291,14594,694],{"class":3435},[1291,14596,10920],{"class":3812},[1291,14598,3816],{"class":3435},[1291,14600,10049],{"class":3819},[1291,14602,3738],{"class":3435},[1291,14604,3691],{"class":3435},[1291,14606,7849],{"class":3439},[1291,14608,3691],{"class":3435},[1291,14610,3566],{"class":3435},[1291,14612,10937],{"class":3819},[1291,14614,3738],{"class":3435},[1291,14616,10855],{"class":3812},[1291,14618,6237],{"class":3435},[1291,14620,14621],{"class":3427,"line":6124},[1291,14622,3526],{"emptyLinePlaceholder":35},[1291,14624,14625,14627,14629,14631],{"class":3427,"line":6133},[1291,14626,10614],{"class":3431},[1291,14628,3738],{"class":3435},[1291,14630,10956],{"class":3812},[1291,14632,3874],{"class":3435},[1291,14634,14635,14637,14639,14641],{"class":3427,"line":6141},[1291,14636,10164],{"class":3819},[1291,14638,3738],{"class":3435},[1291,14640,4585],{"class":3812},[1291,14642,4107],{"class":3435},[1291,14644,14645,14647,14649,14651],{"class":3427,"line":6151},[1291,14646,4616],{"class":3819},[1291,14648,3738],{"class":3435},[1291,14650,4621],{"class":3812},[1291,14652,4107],{"class":3435},[1291,14654,14655,14657,14659,14661],{"class":3427,"line":6923},[1291,14656,4604],{"class":3819},[1291,14658,3738],{"class":3435},[1291,14660,10983],{"class":3812},[1291,14662,4107],{"class":3435},[1291,14664,14665,14667,14669,14671,14673,14675,14677,14679],{"class":3427,"line":6928},[1291,14666,14027],{"class":3819},[1291,14668,3738],{"class":3435},[1291,14670,14032],{"class":3812},[1291,14672,3816],{"class":3435},[1291,14674,4597],{"class":3819},[1291,14676,3738],{"class":3435},[1291,14678,4597],{"class":3812},[1291,14680,4242],{"class":3435},[1291,14682,14683],{"class":3427,"line":6934},[1291,14684,3827],{"class":3435},[1291,14686,14687],{"class":3427,"line":6940},[1291,14688,3526],{"emptyLinePlaceholder":35},[1291,14690,14691,14693,14695,14697,14699,14701,14703],{"class":3427,"line":6952},[1291,14692,11006],{"class":3431},[1291,14694,4390],{"class":3435},[1291,14696,9387],{"class":6356},[1291,14698,3702],{"class":3435},[1291,14700,11015],{"class":3435},[1291,14702,11018],{"class":3439},[1291,14704,11021],{"class":3435},[1291,14706,14707,14709],{"class":3427,"line":6984},[1291,14708,11026],{"class":3439},[1291,14710,11021],{"class":3435},[1291,14712,14713],{"class":3427,"line":7996},[1291,14714,11033],{"class":3439},[1291,14716,14717,14719],{"class":3427,"line":8007},[1291,14718,11038],{"class":3439},[1291,14720,11041],{"class":3451},[1291,14722,14723],{"class":3427,"line":8018},[1291,14724,3526],{"emptyLinePlaceholder":35},[1291,14726,14727,14729],{"class":3427,"line":8029},[1291,14728,11050],{"class":3439},[1291,14730,6019],{"class":3451},[1291,14732,14733],{"class":3427,"line":8040},[1291,14734,3526],{"emptyLinePlaceholder":35},[1291,14736,14737,14739],{"class":3427,"line":8051},[1291,14738,11061],{"class":3439},[1291,14740,6035],{"class":3435},[1291,14742,14743],{"class":3427,"line":8057},[1291,14744,3526],{"emptyLinePlaceholder":35},[1291,14746,14747,14749,14751,14753],{"class":3427,"line":8068},[1291,14748,11077],{"class":3431},[1291,14750,3738],{"class":3435},[1291,14752,4654],{"class":3812},[1291,14754,3874],{"class":3435},[1291,14756,14757,14759,14761,14763],{"class":3427,"line":8079},[1291,14758,10105],{"class":3819},[1291,14760,3738],{"class":3435},[1291,14762,3627],{"class":3812},[1291,14764,4107],{"class":3435},[1291,14766,14767,14769,14771,14773],{"class":3427,"line":8090},[1291,14768,11098],{"class":3819},[1291,14770,3738],{"class":3435},[1291,14772,11103],{"class":3812},[1291,14774,4107],{"class":3435},[1291,14776,14777,14779,14781,14783],{"class":3427,"line":8101},[1291,14778,11110],{"class":3819},[1291,14780,3738],{"class":3435},[1291,14782,11006],{"class":3812},[1291,14784,4107],{"class":3435},[1291,14786,14787],{"class":3427,"line":8112},[1291,14788,3827],{"class":3435},[1291,14790,14791],{"class":3427,"line":8117},[1291,14792,3526],{"emptyLinePlaceholder":35},[1291,14794,14795],{"class":3427,"line":8128},[1291,14796,3526],{"emptyLinePlaceholder":35},[1291,14798,14799,14801,14803,14805,14807,14809,14811,14813,14815,14817],{"class":3427,"line":8139},[1291,14800,5536],{"class":3431},[1291,14802,3738],{"class":3435},[1291,14804,11226],{"class":3812},[1291,14806,3816],{"class":3435},[1291,14808,11169],{"class":3812},[1291,14810,3566],{"class":3435},[1291,14812,11235],{"class":3812},[1291,14814,3566],{"class":3435},[1291,14816,11240],{"class":3812},[1291,14818,3827],{"class":3435},[1291,14820,14821],{"class":3427,"line":8150},[1291,14822,3526],{"emptyLinePlaceholder":35},[1291,14824,14825,14827,14829,14831,14833,14835,14837,14839,14841,14843,14845,14847,14849,14851,14853,14855,14857,14859],{"class":3427,"line":8156},[1291,14826,11251],{"class":3431},[1291,14828,3738],{"class":3435},[1291,14830,11256],{"class":3431},[1291,14832,694],{"class":3435},[1291,14834,11261],{"class":3812},[1291,14836,3816],{"class":3435},[1291,14838,4813],{"class":3819},[1291,14840,3738],{"class":3435},[1291,14842,5580],{"class":3812},[1291,14844,694],{"class":3435},[1291,14846,11274],{"class":3457},[1291,14848,3566],{"class":3435},[1291,14850,11279],{"class":3819},[1291,14852,3738],{"class":3435},[1291,14854,11284],{"class":6356},[1291,14856,3816],{"class":3435},[1291,14858,11289],{"class":3819},[1291,14860,11292],{"class":3435},[3418,14862,14863],{"className":3420,"code":11298,"language":3422,"meta":23,"style":23},[3061,14864,14865],{"__ignoreMap":23},[1291,14866,14867,14869,14871,14873],{"class":3427,"line":3428},[1291,14868,11305],{"class":3431},[1291,14870,694],{"class":3435},[1291,14872,4868],{"class":3812},[1291,14874,4871],{"class":3435},[3418,14876,14878],{"className":3420,"code":14877,"language":3422,"meta":23,"style":23},"predicted_dataset_gtembedder = predict_test_dataset(dataset)\nragas_evals_dataset_gtembedder = run_ragas_evaluations(predicted_dataset_gtembedder)\n",[3061,14879,14880,14895],{"__ignoreMap":23},[1291,14881,14882,14885,14887,14889,14891,14893],{"class":3427,"line":3428},[1291,14883,14884],{"class":3431},"predicted_dataset_gtembedder ",[1291,14886,3738],{"class":3435},[1291,14888,11401],{"class":3812},[1291,14890,3816],{"class":3435},[1291,14892,9256],{"class":3812},[1291,14894,3827],{"class":3435},[1291,14896,14897,14900,14902,14904,14906,14909],{"class":3427,"line":24},[1291,14898,14899],{"class":3431},"ragas_evals_dataset_gtembedder ",[1291,14901,3738],{"class":3435},[1291,14903,11937],{"class":3812},[1291,14905,3816],{"class":3435},[1291,14907,14908],{"class":3812},"predicted_dataset_gtembedder",[1291,14910,3827],{"class":3435},[3418,14912,14914],{"className":3420,"code":14913,"language":3422,"meta":23,"style":23},"ragas_evals_dataset_gtembedder\n",[3061,14915,14916],{"__ignoreMap":23},[1291,14917,14918],{"class":3427,"line":3428},[1291,14919,14913],{"class":3431},[3418,14921,14924],{"className":14922,"code":14923,"language":4999},[4997],"{'answer_correctness': 0.641, 'faithfulness': 0.5327, 'context_recall': 0.9357, 'context_precision': 0.8268}\n",[3061,14925,14923],{"__ignoreMap":23},[73,14927,14928],{},"Hmm, seems like this embedder cannot quite work as well as the previous one. Weirdly, faithfulness score dropped significantly, maybe the ordering of the chunks is the reason.",[73,14930,14931],{},"Let's see if we can improve the performance with the prompt.",[3418,14933,14934],{"className":3420,"code":13119,"language":3422,"meta":23,"style":23},[3061,14935,14936,14940,14944,14954,14964,14968],{"__ignoreMap":23},[1291,14937,14938],{"class":3427,"line":3428},[1291,14939,13126],{"class":3673},[1291,14941,14942],{"class":3427,"line":24},[1291,14943,3526],{"emptyLinePlaceholder":35},[1291,14945,14946,14948,14950,14952],{"class":3427,"line":675},[1291,14947,11305],{"class":3431},[1291,14949,694],{"class":3435},[1291,14951,12364],{"class":3812},[1291,14953,4871],{"class":3435},[1291,14955,14956,14958,14960,14962],{"class":3427,"line":3542},[1291,14957,11305],{"class":3431},[1291,14959,694],{"class":3435},[1291,14961,9544],{"class":3812},[1291,14963,4871],{"class":3435},[1291,14965,14966],{"class":3427,"line":3547},[1291,14967,3526],{"emptyLinePlaceholder":35},[1291,14969,14970,14972],{"class":3427,"line":3572},[1291,14971,12449],{"class":3812},[1291,14973,4871],{"class":3435},[3189,14975,9076],{"id":14976},"changing-the-prompt",[73,14978,14979],{},"Prompt is one of the more important aspects of a good RAG solution. Although we will aim for a prompt that will work in general, you may want to modify your prompt in consideration with your users' expectations, business goals, domain knowledge or other variables.",[73,14981,14982],{},"Now, let's keep the same embedder as above and change the prompt to be a bit more compute intensive.",[3418,14984,14986],{"className":3420,"code":14985,"language":3422,"meta":23,"style":23},"folder = pw.io.fs.read(\n    path=INPUT_FOLDER,\n    format=\"binary\",\n    with_metadata=True,\n)\n\nsources = [folder]\n\nparser = parsers.PypdfParser()\n# pypdf parser splits documents by the page, so we don't need another splitter\ntext_splitter = None\n\nembedder = embedders.SentenceTransformerEmbedder(\n    model=\"Alibaba-NLP\u002Fgte-large-en-v1.5\",\n    call_kwargs={\"show_progress_bar\": False},\n    trust_remote_code=True,\n)\n\nllm = llms.OpenAIChat(model=\"gpt-4o\", cache_strategy=DiskCache())\n\ndocument_store = DocumentStore(\n    docs=sources,\n    parser=parser,\n    splitter=text_splitter,\n    retriever_factory=BruteForceKnnFactory(embedder=embedder),\n)\n\nprompt_template: str = \"\"\"You are an assistant for question-answering tasks. \\\nUse the following pieces of retrieved context to answer the question. \\\n\nBefore answering the question, first think about and list the relevant parts from the given context. \\\nThen, answer the question based on the facts you have listed.\n\nAlways structure your responses in the following format:\nRelevant contexts: [Write the relevant parts of the context for given question]\nAnswer: [Detailed response to the user's question that is grounded by the facts you listed]\n\nIf you don't know the answer, just say that you don't know.\n\n\nQuestion: {query}\n\nContext: {context}\n\nAnswer:\"\"\"\n\nrag_app = BaseRAGQuestionAnswerer(\n    llm=llm,\n    indexer=document_store,\n    prompt_template=prompt_template,\n)\n\n\nserver = QASummaryRestServer(pathway_host, pathway_port, rag_app)\n\nserver_process = multiprocessing.Process(target=server.run, kwargs=dict(threaded=False))\n",[3061,14987,14988,15010,15020,15034,15040,15044,15048,15060,15064,15078,15082,15090,15094,15108,15122,15138,15144,15148,15152,15186,15190,15200,15210,15220,15230,15248,15252,15256,15272,15278,15282,15289,15294,15298,15303,15308,15313,15317,15321,15325,15329,15336,15340,15347,15351,15358,15362,15372,15382,15392,15402,15406,15410,15414,15436,15440],{"__ignoreMap":23},[1291,14989,14990,14992,14994,14996,14998,15000,15002,15004,15006,15008],{"class":3427,"line":3428},[1291,14991,4068],{"class":3431},[1291,14993,3738],{"class":3435},[1291,14995,4073],{"class":3431},[1291,14997,694],{"class":3435},[1291,14999,4078],{"class":3457},[1291,15001,694],{"class":3435},[1291,15003,4083],{"class":3457},[1291,15005,694],{"class":3435},[1291,15007,4088],{"class":3812},[1291,15009,3874],{"class":3435},[1291,15011,15012,15014,15016,15018],{"class":3427,"line":24},[1291,15013,4095],{"class":3819},[1291,15015,3738],{"class":3435},[1291,15017,9745],{"class":3812},[1291,15019,4107],{"class":3435},[1291,15021,15022,15024,15026,15028,15030,15032],{"class":3427,"line":675},[1291,15023,4112],{"class":3819},[1291,15025,3738],{"class":3435},[1291,15027,3691],{"class":3435},[1291,15029,4119],{"class":3439},[1291,15031,3691],{"class":3435},[1291,15033,4107],{"class":3435},[1291,15035,15036,15038],{"class":3427,"line":3542},[1291,15037,4128],{"class":3819},[1291,15039,4131],{"class":3435},[1291,15041,15042],{"class":3427,"line":3547},[1291,15043,3827],{"class":3435},[1291,15045,15046],{"class":3427,"line":3572},[1291,15047,3526],{"emptyLinePlaceholder":35},[1291,15049,15050,15052,15054,15056,15058],{"class":3427,"line":3614},[1291,15051,4140],{"class":3431},[1291,15053,3738],{"class":3435},[1291,15055,4145],{"class":3435},[1291,15057,4148],{"class":3431},[1291,15059,5267],{"class":3435},[1291,15061,15062],{"class":3427,"line":3640},[1291,15063,3526],{"emptyLinePlaceholder":35},[1291,15065,15066,15068,15070,15072,15074,15076],{"class":3427,"line":3665},[1291,15067,4522],{"class":3431},[1291,15069,3738],{"class":3435},[1291,15071,3601],{"class":3431},[1291,15073,694],{"class":3435},[1291,15075,10357],{"class":3812},[1291,15077,4871],{"class":3435},[1291,15079,15080],{"class":3427,"line":3670},[1291,15081,13884],{"class":3673},[1291,15083,15084,15086,15088],{"class":3427,"line":3677},[1291,15085,5370],{"class":3431},[1291,15087,3738],{"class":3435},[1291,15089,13280],{"class":3435},[1291,15091,15092],{"class":3427,"line":3877},[1291,15093,3526],{"emptyLinePlaceholder":35},[1291,15095,15096,15098,15100,15102,15104,15106],{"class":3427,"line":3916},[1291,15097,4292],{"class":3431},[1291,15099,3738],{"class":3435},[1291,15101,3591],{"class":3431},[1291,15103,694],{"class":3435},[1291,15105,14532],{"class":3812},[1291,15107,3874],{"class":3435},[1291,15109,15110,15112,15114,15116,15118,15120],{"class":3427,"line":4519},[1291,15111,4195],{"class":3819},[1291,15113,3738],{"class":3435},[1291,15115,3691],{"class":3435},[1291,15117,14545],{"class":3439},[1291,15119,3691],{"class":3435},[1291,15121,4107],{"class":3435},[1291,15123,15124,15126,15128,15130,15132,15134,15136],{"class":3427,"line":6038},[1291,15125,14554],{"class":3819},[1291,15127,14557],{"class":3435},[1291,15129,3691],{"class":3435},[1291,15131,14562],{"class":3439},[1291,15133,3691],{"class":3435},[1291,15135,4390],{"class":3435},[1291,15137,14569],{"class":3435},[1291,15139,15140,15142],{"class":3427,"line":6043},[1291,15141,14574],{"class":3819},[1291,15143,4131],{"class":3435},[1291,15145,15146],{"class":3427,"line":6066},[1291,15147,3827],{"class":3435},[1291,15149,15150],{"class":3427,"line":6078},[1291,15151,3526],{"emptyLinePlaceholder":35},[1291,15153,15154,15156,15158,15160,15162,15164,15166,15168,15170,15172,15174,15176,15178,15180,15182,15184],{"class":3427,"line":6089},[1291,15155,3586],{"class":3431},[1291,15157,3738],{"class":3435},[1291,15159,3596],{"class":3431},[1291,15161,694],{"class":3435},[1291,15163,10920],{"class":3812},[1291,15165,3816],{"class":3435},[1291,15167,10049],{"class":3819},[1291,15169,3738],{"class":3435},[1291,15171,3691],{"class":3435},[1291,15173,7849],{"class":3439},[1291,15175,3691],{"class":3435},[1291,15177,3566],{"class":3435},[1291,15179,10937],{"class":3819},[1291,15181,3738],{"class":3435},[1291,15183,10855],{"class":3812},[1291,15185,6237],{"class":3435},[1291,15187,15188],{"class":3427,"line":6124},[1291,15189,3526],{"emptyLinePlaceholder":35},[1291,15191,15192,15194,15196,15198],{"class":3427,"line":6133},[1291,15193,10614],{"class":3431},[1291,15195,3738],{"class":3435},[1291,15197,10956],{"class":3812},[1291,15199,3874],{"class":3435},[1291,15201,15202,15204,15206,15208],{"class":3427,"line":6141},[1291,15203,10164],{"class":3819},[1291,15205,3738],{"class":3435},[1291,15207,4585],{"class":3812},[1291,15209,4107],{"class":3435},[1291,15211,15212,15214,15216,15218],{"class":3427,"line":6151},[1291,15213,4616],{"class":3819},[1291,15215,3738],{"class":3435},[1291,15217,4621],{"class":3812},[1291,15219,4107],{"class":3435},[1291,15221,15222,15224,15226,15228],{"class":3427,"line":6923},[1291,15223,4604],{"class":3819},[1291,15225,3738],{"class":3435},[1291,15227,10983],{"class":3812},[1291,15229,4107],{"class":3435},[1291,15231,15232,15234,15236,15238,15240,15242,15244,15246],{"class":3427,"line":6928},[1291,15233,14027],{"class":3819},[1291,15235,3738],{"class":3435},[1291,15237,14032],{"class":3812},[1291,15239,3816],{"class":3435},[1291,15241,4597],{"class":3819},[1291,15243,3738],{"class":3435},[1291,15245,4597],{"class":3812},[1291,15247,4242],{"class":3435},[1291,15249,15250],{"class":3427,"line":6934},[1291,15251,3827],{"class":3435},[1291,15253,15254],{"class":3427,"line":6940},[1291,15255,3526],{"emptyLinePlaceholder":35},[1291,15257,15258,15260,15262,15264,15266,15268,15270],{"class":3427,"line":6952},[1291,15259,11006],{"class":3431},[1291,15261,4390],{"class":3435},[1291,15263,9387],{"class":6356},[1291,15265,3702],{"class":3435},[1291,15267,11015],{"class":3435},[1291,15269,11018],{"class":3439},[1291,15271,11021],{"class":3435},[1291,15273,15274,15276],{"class":3427,"line":6984},[1291,15275,11026],{"class":3439},[1291,15277,11021],{"class":3435},[1291,15279,15280],{"class":3427,"line":7996},[1291,15281,3526],{"emptyLinePlaceholder":35},[1291,15283,15284,15287],{"class":3427,"line":8007},[1291,15285,15286],{"class":3439},"Before answering the question, first think about and list the relevant parts from the given context. ",[1291,15288,11021],{"class":3435},[1291,15290,15291],{"class":3427,"line":8018},[1291,15292,15293],{"class":3439},"Then, answer the question based on the facts you have listed.\n",[1291,15295,15296],{"class":3427,"line":8029},[1291,15297,3526],{"emptyLinePlaceholder":35},[1291,15299,15300],{"class":3427,"line":8040},[1291,15301,15302],{"class":3439},"Always structure your responses in the following format:\n",[1291,15304,15305],{"class":3427,"line":8051},[1291,15306,15307],{"class":3439},"Relevant contexts: [Write the relevant parts of the context for given question]\n",[1291,15309,15310],{"class":3427,"line":8057},[1291,15311,15312],{"class":3439},"Answer: [Detailed response to the user's question that is grounded by the facts you listed]\n",[1291,15314,15315],{"class":3427,"line":8068},[1291,15316,3526],{"emptyLinePlaceholder":35},[1291,15318,15319],{"class":3427,"line":8079},[1291,15320,11033],{"class":3439},[1291,15322,15323],{"class":3427,"line":8090},[1291,15324,3526],{"emptyLinePlaceholder":35},[1291,15326,15327],{"class":3427,"line":8101},[1291,15328,3526],{"emptyLinePlaceholder":35},[1291,15330,15331,15334],{"class":3427,"line":8112},[1291,15332,15333],{"class":3439},"Question: ",[1291,15335,11041],{"class":3451},[1291,15337,15338],{"class":3427,"line":8117},[1291,15339,3526],{"emptyLinePlaceholder":35},[1291,15341,15342,15345],{"class":3427,"line":8128},[1291,15343,15344],{"class":3439},"Context: ",[1291,15346,6019],{"class":3451},[1291,15348,15349],{"class":3427,"line":8139},[1291,15350,3526],{"emptyLinePlaceholder":35},[1291,15352,15353,15356],{"class":3427,"line":8150},[1291,15354,15355],{"class":3439},"Answer:",[1291,15357,6035],{"class":3435},[1291,15359,15360],{"class":3427,"line":8156},[1291,15361,3526],{"emptyLinePlaceholder":35},[1291,15363,15364,15366,15368,15370],{"class":3427,"line":8162},[1291,15365,11077],{"class":3431},[1291,15367,3738],{"class":3435},[1291,15369,4654],{"class":3812},[1291,15371,3874],{"class":3435},[1291,15373,15374,15376,15378,15380],{"class":3427,"line":8168},[1291,15375,10105],{"class":3819},[1291,15377,3738],{"class":3435},[1291,15379,3627],{"class":3812},[1291,15381,4107],{"class":3435},[1291,15383,15384,15386,15388,15390],{"class":3427,"line":8174},[1291,15385,11098],{"class":3819},[1291,15387,3738],{"class":3435},[1291,15389,11103],{"class":3812},[1291,15391,4107],{"class":3435},[1291,15393,15394,15396,15398,15400],{"class":3427,"line":8180},[1291,15395,11110],{"class":3819},[1291,15397,3738],{"class":3435},[1291,15399,11006],{"class":3812},[1291,15401,4107],{"class":3435},[1291,15403,15404],{"class":3427,"line":8186},[1291,15405,3827],{"class":3435},[1291,15407,15408],{"class":3427,"line":8191},[1291,15409,3526],{"emptyLinePlaceholder":35},[1291,15411,15412],{"class":3427,"line":8197},[1291,15413,3526],{"emptyLinePlaceholder":35},[1291,15415,15416,15418,15420,15422,15424,15426,15428,15430,15432,15434],{"class":3427,"line":8203},[1291,15417,5536],{"class":3431},[1291,15419,3738],{"class":3435},[1291,15421,11226],{"class":3812},[1291,15423,3816],{"class":3435},[1291,15425,11169],{"class":3812},[1291,15427,3566],{"class":3435},[1291,15429,11235],{"class":3812},[1291,15431,3566],{"class":3435},[1291,15433,11240],{"class":3812},[1291,15435,3827],{"class":3435},[1291,15437,15438],{"class":3427,"line":8209},[1291,15439,3526],{"emptyLinePlaceholder":35},[1291,15441,15442,15444,15446,15448,15450,15452,15454,15456,15458,15460,15462,15464,15466,15468,15470,15472,15474,15476],{"class":3427,"line":8214},[1291,15443,11251],{"class":3431},[1291,15445,3738],{"class":3435},[1291,15447,11256],{"class":3431},[1291,15449,694],{"class":3435},[1291,15451,11261],{"class":3812},[1291,15453,3816],{"class":3435},[1291,15455,4813],{"class":3819},[1291,15457,3738],{"class":3435},[1291,15459,5580],{"class":3812},[1291,15461,694],{"class":3435},[1291,15463,11274],{"class":3457},[1291,15465,3566],{"class":3435},[1291,15467,11279],{"class":3819},[1291,15469,3738],{"class":3435},[1291,15471,11284],{"class":6356},[1291,15473,3816],{"class":3435},[1291,15475,11289],{"class":3819},[1291,15477,11292],{"class":3435},[3418,15479,15480],{"className":3420,"code":11298,"language":3422,"meta":23,"style":23},[3061,15481,15482],{"__ignoreMap":23},[1291,15483,15484,15486,15488,15490],{"class":3427,"line":3428},[1291,15485,11305],{"class":3431},[1291,15487,694],{"class":3435},[1291,15489,4868],{"class":3812},[1291,15491,4871],{"class":3435},[3418,15493,15495],{"className":3420,"code":15494,"language":3422,"meta":23,"style":23},"predicted_dataset_gtembedder_semantic = predict_test_dataset(dataset)\nragas_evals_dataset_gtembedder_semantic = run_ragas_evaluations(\n    predicted_dataset_gtembedder_semantic\n)\n",[3061,15496,15497,15512,15523,15528],{"__ignoreMap":23},[1291,15498,15499,15502,15504,15506,15508,15510],{"class":3427,"line":3428},[1291,15500,15501],{"class":3431},"predicted_dataset_gtembedder_semantic ",[1291,15503,3738],{"class":3435},[1291,15505,11401],{"class":3812},[1291,15507,3816],{"class":3435},[1291,15509,9256],{"class":3812},[1291,15511,3827],{"class":3435},[1291,15513,15514,15517,15519,15521],{"class":3427,"line":24},[1291,15515,15516],{"class":3431},"ragas_evals_dataset_gtembedder_semantic ",[1291,15518,3738],{"class":3435},[1291,15520,11937],{"class":3812},[1291,15522,3874],{"class":3435},[1291,15524,15525],{"class":3427,"line":675},[1291,15526,15527],{"class":3812},"    predicted_dataset_gtembedder_semantic\n",[1291,15529,15530],{"class":3427,"line":3542},[1291,15531,3827],{"class":3435},[3418,15533,15535],{"className":3420,"code":15534,"language":3422,"meta":23,"style":23},"ragas_evals_dataset_gtembedder_semantic\n",[3061,15536,15537],{"__ignoreMap":23},[1291,15538,15539],{"class":3427,"line":3428},[1291,15540,15534],{"class":3431},[3418,15542,15545],{"className":15543,"code":15544,"language":4999},[4997],"{'answer_correctness': 0.7448, 'faithfulness': 0.8011, 'context_recall': 0.9471, 'context_precision': 0.8153}\n",[3061,15546,15544],{"__ignoreMap":23},[73,15548,15549],{},"We can see some clear improvements in terms of correctness. As expected, retrieval metrics remained unchanged from the previous runs (except the one with different embedder).",[3418,15551,15552],{"className":3420,"code":13119,"language":3422,"meta":23,"style":23},[3061,15553,15554,15558,15562,15572,15582,15586],{"__ignoreMap":23},[1291,15555,15556],{"class":3427,"line":3428},[1291,15557,13126],{"class":3673},[1291,15559,15560],{"class":3427,"line":24},[1291,15561,3526],{"emptyLinePlaceholder":35},[1291,15563,15564,15566,15568,15570],{"class":3427,"line":675},[1291,15565,11305],{"class":3431},[1291,15567,694],{"class":3435},[1291,15569,12364],{"class":3812},[1291,15571,4871],{"class":3435},[1291,15573,15574,15576,15578,15580],{"class":3427,"line":3542},[1291,15575,11305],{"class":3431},[1291,15577,694],{"class":3435},[1291,15579,9544],{"class":3812},[1291,15581,4871],{"class":3435},[1291,15583,15584],{"class":3427,"line":3547},[1291,15585,3526],{"emptyLinePlaceholder":35},[1291,15587,15588,15590],{"class":3427,"line":3572},[1291,15589,12449],{"class":3812},[1291,15591,4871],{"class":3435},[140,15593,9082],{"id":15594},"summary-findings",[73,15596,15597],{},"There is no \"one size fits all\" logic when it comes to RAG. You need to find what suits you best and start working from there.",[73,15599,15600],{},"Pathway allows you to build RAG applications that are always live & up-to-date and available. Whether you are building a financial analysis tool for yourself or an internal application for lawyers, you need to think about how to update and refresh your knowledge base. With the help of dynamic connectors, you can focus on problems that matter.",[73,15602,15603],{},"We also learned that we can improve our performance by using a better parsing strategy, increasing the amount of retrieved chunks, or introducing hybrid retrieval rather then semantic search strategy.",[73,15605,15606],{},"However, we have only explored a single RAG paradigm that only consisted of simple retrieval and generation. There are many more ways that are left to explore!\nFor instance, knowledge-graphs can help the LLM to have more relevant context, or agent-driven architectures can unlock new search & retrieval capabilities that can adapt, re-try or reason before taking actions. Pathway can help you build such applications as well.",[73,15608,15609,15610,15615],{},"If you are interested in agents, you may explore our ",[77,15611,15614],{"href":15612,"rel":15613},"https:\u002F\u002Fgithub.com\u002Fpathwaycom\u002Fllm-app\u002Ftree\u002Fmain\u002Fcookbooks\u002Fself-rag-agents",[81],"LangGraph RAG agents with Pathway cookbook"," and stay tuned for more content!",[73,15617,15618],{},"If you are interested in diving deeper into the topic, here are some good references to get started:",[145,15620,15621,15627,15633,15639],{},[148,15622,15623],{},[77,15624,15625],{"href":15625,"rel":15626},"https:\u002F\u002Fpathway.com\u002Fdevelopers\u002Ftemplates",[81],[148,15628,15629],{},[77,15630,15631],{"href":15631,"rel":15632},"https:\u002F\u002Fdocs.ragas.io\u002Fen\u002Flatest\u002Fconcepts\u002Fmetrics\u002Foverview\u002F#different-types-of-metrics",[81],[148,15634,15635],{},[77,15636,15637],{"href":15637,"rel":15638},"https:\u002F\u002Fgithub.com\u002Fbeir-cellar\u002Fbeir",[81],[148,15640,15641],{},[77,15642,15643],{"href":15643,"rel":15644},"https:\u002F\u002Fhuggingface.co\u002Fspaces\u002Fmteb\u002Fleaderboard",[81],[5019,15646,15647],{},"html pre.shiki code .s0W1g, html code.shiki .s0W1g{--shiki-default:#BABED8}html pre.shiki code .sAklC, html code.shiki .sAklC{--shiki-default:#89DDFF}html pre.shiki code .sfyAc, html code.shiki .sfyAc{--shiki-default:#C3E88D}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html pre.shiki code .s6cf3, html code.shiki .s6cf3{--shiki-default:#89DDFF;--shiki-default-font-style:italic}html pre.shiki code .sJ14y, html code.shiki .sJ14y{--shiki-default:#C792EA}html pre.shiki code .sdLwU, html code.shiki .sdLwU{--shiki-default:#82AAFF}html pre.shiki code .s7ZW3, html code.shiki .s7ZW3{--shiki-default:#BABED8;--shiki-default-font-style:italic}html pre.shiki code .s5Dmg, html code.shiki .s5Dmg{--shiki-default:#FFCB6B}html pre.shiki code .s-wAU, html code.shiki .s-wAU{--shiki-default:#F07178}html pre.shiki code .sx098, html code.shiki .sx098{--shiki-default:#F78C6C}html pre.shiki code .saEQR, html code.shiki .saEQR{--shiki-default:#676E95;--shiki-default-font-style:italic}",{"title":23,"searchDepth":24,"depth":24,"links":15649},[15650,15651,15652,15653,15654,15658,15659,15660,15661,15668],{"id":8999,"depth":24,"text":9000},{"id":9085,"depth":24,"text":9008},{"id":9178,"depth":24,"text":9014},{"id":9256,"depth":24,"text":9020},{"id":9276,"depth":24,"text":9026,"children":15655},[15656,15657],{"id":9279,"depth":675,"text":9280},{"id":9809,"depth":675,"text":9810},{"id":10375,"depth":24,"text":9032},{"id":11138,"depth":24,"text":11139},{"id":11382,"depth":24,"text":9038},{"id":12454,"depth":24,"text":12455,"children":15662},[15663,15664,15665,15666,15667],{"id":12472,"depth":675,"text":9052},{"id":13163,"depth":675,"text":13164},{"id":13783,"depth":675,"text":13784},{"id":14335,"depth":675,"text":9070},{"id":14976,"depth":675,"text":9076},{"id":15594,"depth":24,"text":9082},"Learn how to generate a synthetic RAG dataset and evaluate RAG applications for accuracy",{"layout":90,"date":15671,"thumbnail":15672,"tags":15674,"notebook_export_path":15675,"keywords":15676,"hidden":35},"2025-03-13",{"src":15673},"\u002Fassets\u002Fcontent\u002Fshowcases\u002Fevals\u002Frag-evals.png",[5047,3627,6268],"notebooks\u002Ftutorials\u002Frag-evaluations.ipynb",[5049,5050,5051,5052,15677,5056],"evaluations","\u002Fframework\u002Fblog\u002Fevaluating-rag",{"title":8941,"description":15669},{"loc":15678},"framework\u002Fblog\u002F1005.evaluating-rag","i3JNdJgRSHZiTQ5LcQ5i2JDNDuADDdg99uLDpb24XWo",{"id":15684,"title":15685,"author":15686,"body":15693,"description":17819,"extension":27,"meta":17820,"navigation":35,"path":17826,"seo":17827,"sitemap":17828,"stem":17829,"__hash__":17830},"content\u002Fframework\u002Fblog\u002F1010.chatgpt-python-api-real-time-data.md","How to use ChatGPT API in Python for your real-time data",{"id":15687,"url":15688,"name":15689,"description":15690,"img":15691,"provider":11,"linkedin":15692},"bobur","bobur-umurzokov","Bobur Umurzokov","Developer Advocate","\u002Fassets\u002Fauthors\u002Fbobur-umurzokov.jpg","https:\u002F\u002Fwww.linkedin.com\u002Fin\u002Fboburumurzokov\u002F",{"type":13,"value":15694,"toc":17796},[15695,15707,15710,15718,15729,15754,15768,15772,15775,15789,15793,15800,15814,15818,15835,15839,15866,15902,15906,15913,15916,15929,15952,15956,15959,16038,16042,16061,16065,16085,16090,16094,16103,16214,16223,16227,16248,16322,16329,16520,16524,16537,16605,16609,16621,16689,16693,16696,16717,16721,16724,16856,16860,16866,16905,16913,16956,16960,16963,17024,17028,17035,17474,17478,17489,17496,17529,17532,17729,17738,17742,17746,17749,17763,17767,17781,17785,17793],[73,15696,15697],{},[77,15698,15701],{"href":15699,"target":15700},"https:\u002F\u002Fgithub.com\u002Fpathwaycom\u002Fllm-app#prerequisites","_parent",[597,15702],{"src":15703,"alt":15704,"className":15705},"https:\u002F\u002Fpathway.com\u002Fassets\u002Fgh-badge.svg","View in Github",[15706],"inline",[68,15708,15685],{"id":15709},"how-to-use-chatgpt-api-in-python-for-your-real-time-data",[73,15711,15712,15717],{},[77,15713,15716],{"href":15714,"rel":15715},"https:\u002F\u002Fopenai.com\u002Fgpt-4",[81],"OpenAI's GPT"," has emerged as the foremost AI tool globally and is proficient at addressing queries based on its training data. However, it can not answer questions about unknown topics:",[145,15719,15720,15723,15726],{},[148,15721,15722],{},"Recent events after Sep 2021.",[148,15724,15725],{},"Your non-public documents.",[148,15727,15728],{},"Information from past conversations.",[73,15730,15731,15732,15735,15736,15739,15740,15742,15743,15748,15749,694],{},"This task gets even more complicated when you deal with ",[169,15733,15734],{},"real-time data"," that frequently changes. Moreover, you cannot feed extensive content to GPT, nor can it retain your data over extended periods. In this case, you need to ",[169,15737,15738],{},"build a custom LLM (Language Learning Model)"," ",[169,15741,4012],{}," efficiently to give context to the answer process. This piece will walk you through the steps to develop such an application utilizing the open-source ",[77,15744,15747],{"href":15745,"rel":15746},"https:\u002F\u002Fgithub.com\u002Fpathwaycom\u002Fllm-app",[81],"LLM App"," library in Python. The source code is provided as a showcase at ",[77,15750,15753],{"href":15751,"rel":15752},"https:\u002F\u002Fgithub.com\u002Fpathwaycom\u002Fllm-app#showcases",[81],"GitHub",[7294,15755,15758,15763],{"href":15756,"icon":15757},"\u002Fframework\u002Fsolutions","heroicons:code-bracket-square-solid",[6061,15759,15760],{"v-slot:title":23},[73,15761,15762],{},"Realtime Document AI pipelines",[6061,15764,15765],{"v-slot:description":23},[73,15766,15767],{},"Get started with Pathway Live Data Framework Realtime Document AI pipelines with our step-by-step guide, from setup to live document sync. Explore built-in features like Similarity Search, Vector Index, and more!",[140,15769,15771],{"id":15770},"learning-objectives","Learning objectives",[73,15773,15774],{},"You will learn the following throughout the article:",[145,15776,15777,15780,15783,15786],{},[148,15778,15779],{},"The reason why you need to add custom data to ChatGPT.",[148,15781,15782],{},"How to use embeddings, prompt engineering, and ChatGPT for better question answering.",[148,15784,15785],{},"Build your own ChatGPT with custom data using LLM App.",[148,15787,15788],{},"Create a ChatGPT Python API for finding real-time discounts or sales prices.",[140,15790,15792],{"id":15791},"why-provide-chatgpt-with-a-custom-knowledge-base","Why provide ChatGPT with a custom knowledge base?",[73,15794,15795,15796,15799],{},"Before jumping into the ways to enhance ChatGPT, let's first explore the manual methods of doing so and identify their challenges. Typically, ChatGPT is expanded through ",[169,15797,15798],{},"prompt engineering",". Assume that you want to find real-time discounts\u002Fdeals\u002Fcoupons from various online markets.",[73,15801,15802,15803,15807,15808,15813],{},"For example, when you ask ChatGPT “",[15804,15805,15806],"em",{},"Can you find me discounts this week for Adidas men's shoes?","”, a standard response you can get from the ",[77,15809,15812],{"href":15810,"rel":15811},"https:\u002F\u002Fchat.openai.com\u002F",[81],"ChatGPT UI"," interface without having custom knowledge is:",[3098,15815],{"alt":15816,"src":15817},"ChatGPT not respond to real time discounts","\u002Fassets\u002Fcontent\u002Fblog\u002Fchatgpt-not-respond-to-discounts.gif",[73,15819,15820,15821,15824,15825,15828,15829,15834],{},"As evident, GPT ",[169,15822,15823],{},"offers general advice"," on locating discounts but ",[169,15826,15827],{},"lacks specificity"," regarding where or what type of discounts, among other details. Now to help the model, we supplement it with discount information from a trustworthy data source. You must engage with ChatGPT by adding the initial document content prior to posting the actual questions. We will collect this sample data from the ",[77,15830,15833],{"href":15831,"rel":15832},"https:\u002F\u002Fwww.rainforestapi.com\u002Fdocs\u002Fproduct-data-api\u002Foverview",[81],"Amazon products deal"," dataset and insert only a single JSON item we have into the prompt:",[3098,15836],{"alt":15837,"src":15838},"ChatGPT not respond to real time discounts with single jons item","\u002Fassets\u002Fcontent\u002Fblog\u002Fchatgpt-not-respond-to-discounts-single-json-item.gif",[73,15840,15841,15842,15845,15846,15849,15850,15853,15854,15859,15860,15865],{},"As you can see, you get the expected output and this is quite simple to achieve since ChatGPT is context aware now. However, the issue with this method is that the model's ",[169,15843,15844],{},"context is restricted","; (gpt-4 maximum text length is 8,192 tokens). This strategy will quickly become problematic when input data is huge you may expect ",[169,15847,15848],{},"thousands of items discovered"," in sales and you can not provide this ",[169,15851,15852],{},"large amount of data as an input message",". Also, once you have collected your data, you may want to clean, format, and preprocess data to ensure data quality and relevancy. If you utilize the OpenAI ",[77,15855,15858],{"href":15856,"rel":15857},"https:\u002F\u002Fplatform.openai.com\u002Fdocs\u002Fapi-reference\u002Fchat",[81],"Chat Completion endpoint"," or build ",[77,15861,15864],{"href":15862,"rel":15863},"https:\u002F\u002Fopenai.com\u002Fblog\u002Fchatgpt-plugins",[81],"custom plugins for ChatGPT",", it introduces other problems as followings:",[145,15867,15868,15878,15884,15896],{},[148,15869,15870,15873,15874,15877],{},[169,15871,15872],{},"Cost"," - By providing more detailed information and examples, the model's performance might improve, though at a higher cost (For GPT-4 with an input of 10k tokens and an output of 200 tokens, the cost is $0.624 per prediction). Repeatedly ",[169,15875,15876],{},"sending identical requests"," can escalate costs unless a local cache system is utilized.",[148,15879,15880,15883],{},[169,15881,15882],{},"Latency"," - A challenge with utilizing ChatGPT APIs for production, like those from OpenAI, is their unpredictability. There is no guarantee regarding the provision of consistent service.",[148,15885,15886,15889,15890,15895],{},[169,15887,15888],{},"Security"," - When integrating custom plugins, every API endpoint must be specified in the ",[77,15891,15894],{"href":15892,"rel":15893},"https:\u002F\u002Fplatform.openai.com\u002Fdocs\u002Fplugins\u002Fgetting-started",[81],"OpenAPI spec"," for functionality. This means you're revealing your internal API setup to ChatGPT, a risk many enterprises are skeptical of.",[148,15897,15898,15901],{},[169,15899,15900],{},"Offline Evaluation"," - Conducting offline tests on code and data output or replicating the data flow locally is challenging for developers. This is because each request to the system may yield varying responses.",[140,15903,15905],{"id":15904},"using-embeddings-prompt-engineering-and-chatgpt-for-question-answering","Using embeddings, prompt engineering, and ChatGPT for question answering",[73,15907,15908,15909,15912],{},"A promising approach you find on the internet is utilizing LLMs to create embeddings and then constructing your applications using these embeddings, such as for search and ask systems. In other words, instead of querying ChatGPT using the ",[77,15910,15858],{"href":15856,"rel":15911},[81],", you would do the following query:",[73,15914,15915],{},"Given the following discounts data: {input_data} answer this query: {user_query}",[73,15917,15918,15919,15924,15925,15928],{},"The concept is straightforward. Rather than posting a question directly, the method first creates ",[77,15920,15923],{"href":15921,"rel":15922},"https:\u002F\u002Fplatform.openai.com\u002Fdocs\u002Fguides\u002Fembeddings",[81],"vector embeddings"," through OpenAI API for each input document (text, image, CSV, PDF, or other types of data), then indexes generated embeddings for fast retrieval and stores them into a ",[169,15926,15927],{},"vector database"," and leverages the user's question to search and obtain relevant documents from the vector database. These documents are then presented to ChatGPT along with the question as a prompt. With this added context, ChatGPT can respond as if it's been trained on the internal dataset.",[73,15930,15931,15932,15935,15936,15939,15940,15943,15944,15947,15948,15951],{},"On the other hand, if you use ",[77,15933,712],{"href":1128,"rel":15934},[81],"’s ",[77,15937,15747],{"href":15745,"rel":15938},[81],", you ",[169,15941,15942],{},"don’t need"," even any vector databases. It implements ",[169,15945,15946],{},"real-time in-memory data indexing"," directly reading data from any compatible storage, ",[169,15949,15950],{},"without having to query"," a vector document database that comes with costs like increased prep work, infrastructure, and complexity. Keeping source and vectors in sync is painful. Also, it is even harder if the underlined input data is changing over time and requires re-indexing.",[140,15953,15955],{"id":15954},"chatgpt-with-custom-data-using-llm-app","ChatGPT with custom data using LLM App",[73,15957,15958],{},"These simple steps below explain a data pipelining approach to building a ChatGPT app for your data with LLM App.",[665,15960,15961,16012,16025],{},[148,15962,15963,5243,15966],{},[169,15964,15965],{},"Prepare",[665,15967,15968,15989,15995,16006],{},[148,15969,15970,15973,15974,15978,15979,15983,15984,15988],{},[169,15971,15972],{},"Collect",": Your app reads the data from various data sources (CSV, JsonLines, SQL databases, Kafka, Redpanda, Debezium, and so on) in real-time when a ",[77,15975,15977],{"href":15976},"\u002Fdevelopers\u002Fuser-guide\u002Fintroduction\u002Fstreaming-and-static-modes#streaming-mode","streaming mode"," is enabled with Pathway Live Data Framework (Or you can test data ingestion in ",[77,15980,15982],{"href":15981},"\u002Fdevelopers\u002Fuser-guide\u002Fintroduction\u002Fstreaming-and-static-modes#static-mode","static mode"," too). It also maps each data row into a structured document ",[77,15985,15987],{"href":15986},"\u002Fdevelopers\u002Fuser-guide\u002Fconnect\u002Fschema#schema-usage-in-pathway","schema"," for better managing large data sets.",[148,15990,15991,15994],{},[169,15992,15993],{},"Preprocess",": Optionally, you do easy data cleaning by removing duplicates, irrelevant information, and noisy data that could affect your responses' quality and extracting the data fields you need for further processing. Also, at this stage, you can mask or hide privacy data to avoid them being sent to ChatGPT.",[148,15996,15997,16000,16001,16005],{},[169,15998,15999],{},"Embed",": Each document is ",[77,16002,16004],{"href":15921,"rel":16003},[81],"embedded"," with the OpenAI API and retrieves the embedded result.",[148,16007,16008,16011],{},[169,16009,16010],{},"Indexing",": Constructs an index on the generated embeddings in real-time.",[148,16013,16014,16017],{},[169,16015,16016],{},"Search",[665,16018,16019,16022],{},[148,16020,16021],{},"Given a user question let’s say from an API-friendly interface, generate an embedding for the query from the OpenAI API.",[148,16023,16024],{},"Using the embeddings, retrieve the vector index by relevance to the query on-the-fly.",[148,16026,16027,16030],{},[169,16028,16029],{},"Ask",[665,16031,16032,16035],{},[148,16033,16034],{},"Insert the question and the most relevant sections into a message to GPT",[148,16036,16037],{},"Return GPT's answer (chat completion endpoint)",[140,16039,16041],{"id":16040},"build-a-chatgpt-python-api-for-sales","Build a ChatGPT Python API for sales",[73,16043,16044,16045,16048,16049,16054,16055,16060],{},"Once we have a clear picture of the processes of how the LLM App works in the previous section. You can follow the steps below to understand how to build a ",[169,16046,16047],{},"discount finder app",". The ",[77,16050,16053],{"href":16051,"rel":16052},"https:\u002F\u002Fgithub.com\u002Fpathway-labs\u002Fchatgpt-api-python-sales",[81],"project source code"," can be found on GitHub. If you want to quickly start using the app, you can skip this part and clone the repository and run the code sample by following the instructions in the ",[77,16056,16059],{"href":16057,"rel":16058},"https:\u002F\u002Fgithub.com\u002Fpathway-labs\u002Fchatgpt-api-python-sales\u002Fblob\u002Fmain\u002FREADME.md",[81],"README.md"," file there.",[3189,16062,16064],{"id":16063},"sample-project-objective","Sample project objective",[73,16066,16067,16068,16073,16074,3126,16079,16084],{},"Inspired by this article around ",[77,16069,16072],{"href":16070,"rel":16071},"https:\u002F\u002Fwww.pathway.com\u002Fblog\u002Fbuilding-enterprise-search-apis-with-llms-for-production",[81],"enterprise search",", our sample app should expose an HTTP REST API endpoint in Python to answer user queries about current sales by retrieving the latest deals from various sources (CSV, Jsonlines, API, message brokers, or databases) and leverages OpenAI API ",[77,16075,16078],{"href":16076,"rel":16077},"https:\u002F\u002Fplatform.openai.com\u002Fdocs\u002Fapi-reference\u002Fembeddings",[81],"Embeddings",[77,16080,16083],{"href":16081,"rel":16082},"https:\u002F\u002Fplatform.openai.com\u002Fdocs\u002Fapi-reference\u002Fcompletions",[81],"Chat Completion"," endpoints to generate AI assistant responses.",[1141,16086],{":zoomable":1143,"alt":16087,"sizes":16088,"src":16089},"Find discounts architectural digram","md:1072px sm:576px xs:256px","\u002Fassets\u002Fcontent\u002Fblog\u002Ffind-discounts-app-architectural-diagram.png",[3189,16091,16093],{"id":16092},"step-1-data-collection-custom-data-ingestion","Step 1: Data collection (custom data ingestion)",[73,16095,16096,16097,16102],{},"For simplicity, we can use any CSV as a data source. The app takes CSV files like discounts.csv in the ",[77,16098,16101],{"href":16099,"rel":16100},"https:\u002F\u002Fgithub.com\u002Fpathway-labs\u002Fchatgpt-api-python-sales\u002Ftree\u002Fmain\u002Fexamples\u002Fcsv",[81],"CSV folder"," and uses this data when processing user queries. Here is an example of a CSV file with a single raw:",[16104,16105,16106,16161],"table",{},[16107,16108,16109],"thead",{},[16110,16111,16112,16116,16119,16122,16125,16128,16131,16134,16137,16140,16143,16146,16149,16152,16155,16158],"tr",{},[16113,16114,16115],"th",{},"discount_until",[16113,16117,16118],{},"country",[16113,16120,16121],{},"city",[16113,16123,16124],{},"state",[16113,16126,16127],{},"postal_code",[16113,16129,16130],{},"region",[16113,16132,16133],{},"product_id",[16113,16135,16136],{},"category",[16113,16138,16139],{},"sub_category",[16113,16141,16142],{},"brand",[16113,16144,16145],{},"product_name",[16113,16147,16148],{},"currency",[16113,16150,16151],{},"actual_price",[16113,16153,16154],{},"discount_price",[16113,16156,16157],{},"discount_percentage",[16113,16159,16160],{},"address",[16162,16163,16164],"tbody",{},[16110,16165,16166,16170,16173,16176,16179,16182,16185,16188,16191,16194,16197,16200,16203,16206,16209,16211],{},[16167,16168,16169],"td",{},"2024-08-09",[16167,16171,16172],{},"USA",[16167,16174,16175],{},"Los Angeles",[16167,16177,16178],{},"IL",[16167,16180,16181],{},"22658",[16167,16183,16184],{},"Central",[16167,16186,16187],{},"7849",[16167,16189,16190],{},"Footwear",[16167,16192,16193],{},"Men Shoes",[16167,16195,16196],{},"Nike",[16167,16198,16199],{},"Formal Shoes",[16167,16201,16202],{},"USD",[16167,16204,16205],{},"130.67",[16167,16207,16208],{},"117.60",[16167,16210,6769],{},[16167,16212,16213],{},"321 Oak St",[73,16215,16216,16217,16222],{},"The cool part is, the app is always aware of changes in the CSV folder. If you add another CSV file, the LLM app does magic and automatically updates the AI model's response. ",[77,16218,16221],{"href":16219,"rel":16220},"https:\u002F\u002Fgithub.com\u002Fpathway-labs\u002Fchatgpt-api-python-sales\u002Fblob\u002Fmain\u002Fexamples\u002Fcsv\u002Fdiscounts-data-generator.py",[81],"Discounts data generator"," Python script simulates real-time data coming from external data sources and generates\u002Fupdates existing discounts.csv file with random data. For example, you generate the second CSV discounts2.csv file under the data folder to test the app's reaction to real-time data changes.",[3189,16224,16226],{"id":16225},"step-2-data-loading-and-mapping","Step 2: Data loading and mapping",[73,16228,16229,16230,16234,16235,16238,16239,16243,16244,4390],{},"With the ",[77,16231,16233],{"href":16232},"\u002Fdevelopers\u002Fapi-docs\u002Fpathway-io\u002Fcsv","CSV input connector",", we will read the local CSV file, map data entries into a ",[77,16236,15987],{"href":16237},"\u002Fdevelopers\u002Fuser-guide\u002Fconnect\u002Fschema#data-types-and-schemas"," (if all CSV fields are known) and create a Pathway ",[77,16240,16242],{"href":16241},"\u002Fdevelopers\u002Fapi-docs\u002Fpathway-table","Table",". See the full source code in ",[77,16245,7585],{"href":16246,"rel":16247},"https:\u002F\u002Fgithub.com\u002Fpathway-labs\u002Fchatgpt-api-python-sales\u002Fblob\u002Fmain\u002Fexamples\u002Fapi\u002Fapp.py",[81],[3418,16249,16251],{"className":3420,"code":16250,"language":3422,"meta":23,"style":23},"...\n sales_data = pw.io.csv.read(\n        \".\u002Fexamples\u002Fcsv\u002Fdata\",\n        schema=CsvDiscountsInputSchema,\n        mode=\"streaming\"\n )\n",[3061,16252,16253,16258,16282,16293,16305,16317],{"__ignoreMap":23},[1291,16254,16255],{"class":3427,"line":3428},[1291,16256,16257],{"class":3431},"...\n",[1291,16259,16260,16263,16265,16267,16269,16271,16273,16276,16278,16280],{"class":3427,"line":24},[1291,16261,16262],{"class":3431}," sales_data ",[1291,16264,3738],{"class":3435},[1291,16266,4073],{"class":3431},[1291,16268,694],{"class":3435},[1291,16270,4078],{"class":3457},[1291,16272,694],{"class":3435},[1291,16274,16275],{"class":3457},"csv",[1291,16277,694],{"class":3435},[1291,16279,4088],{"class":3812},[1291,16281,3874],{"class":3435},[1291,16283,16284,16286,16289,16291],{"class":3427,"line":675},[1291,16285,6590],{"class":3435},[1291,16287,16288],{"class":3439},".\u002Fexamples\u002Fcsv\u002Fdata",[1291,16290,3691],{"class":3435},[1291,16292,4107],{"class":3435},[1291,16294,16295,16298,16300,16303],{"class":3427,"line":3542},[1291,16296,16297],{"class":3819},"        schema",[1291,16299,3738],{"class":3435},[1291,16301,16302],{"class":3812},"CsvDiscountsInputSchema",[1291,16304,4107],{"class":3435},[1291,16306,16307,16309,16311,16313,16315],{"class":3427,"line":3547},[1291,16308,6616],{"class":3819},[1291,16310,3738],{"class":3435},[1291,16312,3691],{"class":3435},[1291,16314,5438],{"class":3439},[1291,16316,3746],{"class":3435},[1291,16318,16319],{"class":3427,"line":3572},[1291,16320,16321],{"class":3435}," )\n",[73,16323,16324,16325,4390],{},"Map each data row into a structured document schema. See the full source code in ",[77,16326,7585],{"href":16327,"rel":16328},"https:\u002F\u002Fgithub.com\u002Fpathway-labs\u002Fchatgpt-api-python-sales\u002Fblob\u002Fmain\u002Fexamples\u002Fcsv\u002Fapi\u002Fapp.py",[81],[3418,16330,16332],{"className":3420,"code":16331,"language":3422,"meta":23,"style":23},"import pathway as pw\n\n...\nclass CsvDiscountsInputSchema(pw.Schema):\n    discount_until: str\n    country: str\n    city: str\n    state: str\n    postal_code: str\n    region: str\n    product_id: str\n    category: str\n    sub_category: str\n    brand: str\n    product_name: str\n    currency: str\n    actual_price: str\n    discount_price: str\n    discount_percentage: str\n    address: str\n...\n",[3061,16333,16334,16344,16348,16352,16371,16381,16390,16399,16408,16417,16426,16435,16444,16453,16462,16471,16480,16489,16498,16507,16516],{"__ignoreMap":23},[1291,16335,16336,16338,16340,16342],{"class":3427,"line":3428},[1291,16337,3476],{"class":3475},[1291,16339,3533],{"class":3431},[1291,16341,3536],{"class":3475},[1291,16343,3539],{"class":3431},[1291,16345,16346],{"class":3427,"line":24},[1291,16347,3526],{"emptyLinePlaceholder":35},[1291,16349,16350],{"class":3427,"line":675},[1291,16351,16257],{"class":3431},[1291,16353,16354,16357,16360,16362,16364,16366,16369],{"class":3427,"line":3542},[1291,16355,16356],{"class":7739},"class",[1291,16358,16359],{"class":6356}," CsvDiscountsInputSchema",[1291,16361,3816],{"class":3435},[1291,16363,3841],{"class":6356},[1291,16365,694],{"class":3435},[1291,16367,16368],{"class":6356},"Schema",[1291,16370,11948],{"class":3435},[1291,16372,16373,16376,16378],{"class":3427,"line":3547},[1291,16374,16375],{"class":3431},"    discount_until",[1291,16377,4390],{"class":3435},[1291,16379,16380],{"class":6356}," str\n",[1291,16382,16383,16386,16388],{"class":3427,"line":3572},[1291,16384,16385],{"class":3431},"    country",[1291,16387,4390],{"class":3435},[1291,16389,16380],{"class":6356},[1291,16391,16392,16395,16397],{"class":3427,"line":3614},[1291,16393,16394],{"class":3431},"    city",[1291,16396,4390],{"class":3435},[1291,16398,16380],{"class":6356},[1291,16400,16401,16404,16406],{"class":3427,"line":3640},[1291,16402,16403],{"class":3431},"    state",[1291,16405,4390],{"class":3435},[1291,16407,16380],{"class":6356},[1291,16409,16410,16413,16415],{"class":3427,"line":3665},[1291,16411,16412],{"class":3431},"    postal_code",[1291,16414,4390],{"class":3435},[1291,16416,16380],{"class":6356},[1291,16418,16419,16422,16424],{"class":3427,"line":3670},[1291,16420,16421],{"class":3431},"    region",[1291,16423,4390],{"class":3435},[1291,16425,16380],{"class":6356},[1291,16427,16428,16431,16433],{"class":3427,"line":3677},[1291,16429,16430],{"class":3431},"    product_id",[1291,16432,4390],{"class":3435},[1291,16434,16380],{"class":6356},[1291,16436,16437,16440,16442],{"class":3427,"line":3877},[1291,16438,16439],{"class":3431},"    category",[1291,16441,4390],{"class":3435},[1291,16443,16380],{"class":6356},[1291,16445,16446,16449,16451],{"class":3427,"line":3916},[1291,16447,16448],{"class":3431},"    sub_category",[1291,16450,4390],{"class":3435},[1291,16452,16380],{"class":6356},[1291,16454,16455,16458,16460],{"class":3427,"line":4519},[1291,16456,16457],{"class":3431},"    brand",[1291,16459,4390],{"class":3435},[1291,16461,16380],{"class":6356},[1291,16463,16464,16467,16469],{"class":3427,"line":6038},[1291,16465,16466],{"class":3431},"    product_name",[1291,16468,4390],{"class":3435},[1291,16470,16380],{"class":6356},[1291,16472,16473,16476,16478],{"class":3427,"line":6043},[1291,16474,16475],{"class":3431},"    currency",[1291,16477,4390],{"class":3435},[1291,16479,16380],{"class":6356},[1291,16481,16482,16485,16487],{"class":3427,"line":6066},[1291,16483,16484],{"class":3431},"    actual_price",[1291,16486,4390],{"class":3435},[1291,16488,16380],{"class":6356},[1291,16490,16491,16494,16496],{"class":3427,"line":6078},[1291,16492,16493],{"class":3431},"    discount_price",[1291,16495,4390],{"class":3435},[1291,16497,16380],{"class":6356},[1291,16499,16500,16503,16505],{"class":3427,"line":6089},[1291,16501,16502],{"class":3431},"    discount_percentage",[1291,16504,4390],{"class":3435},[1291,16506,16380],{"class":6356},[1291,16508,16509,16512,16514],{"class":3427,"line":6124},[1291,16510,16511],{"class":3431},"    address",[1291,16513,4390],{"class":3435},[1291,16515,16380],{"class":6356},[1291,16517,16518],{"class":3427,"line":6133},[1291,16519,16257],{"class":3431},[3189,16521,16523],{"id":16522},"step-3-data-preprocessing","Step 3: Data preprocessing",[73,16525,16526,16527,16531,16532,4390],{},"After our documents are loaded into a table, we transform each table row into a self-contained column called doc with column titles and values using the ",[77,16528,16530],{"href":16529},"\u002Fdevelopers\u002Fapi-docs\u002Fpathway#pathway.apply","Pathway Apply"," function. See the full source code in ",[77,16533,16536],{"href":16534,"rel":16535},"https:\u002F\u002Fgithub.com\u002Fpathway-labs\u002Fchatgpt-api-python-sales\u002Fblob\u002Fmain\u002Fcommon\u002Ftransform.py",[81],"transform.py",[3418,16538,16540],{"className":3420,"code":16539,"language":3422,"meta":23,"style":23},"...\ndef transform(sales_data):\n    return sales_data.select(\n        doc=pw.apply(concat_with_titles, **sales_data),\n    )\n",[3061,16541,16542,16546,16560,16574,16601],{"__ignoreMap":23},[1291,16543,16544],{"class":3427,"line":3428},[1291,16545,16257],{"class":3431},[1291,16547,16548,16550,16553,16555,16558],{"class":3427,"line":24},[1291,16549,11398],{"class":7739},[1291,16551,16552],{"class":3812}," transform",[1291,16554,3816],{"class":3435},[1291,16556,16557],{"class":3819},"sales_data",[1291,16559,11948],{"class":3435},[1291,16561,16562,16564,16567,16569,16572],{"class":3427,"line":675},[1291,16563,11771],{"class":3475},[1291,16565,16566],{"class":3431}," sales_data",[1291,16568,694],{"class":3435},[1291,16570,16571],{"class":3812},"select",[1291,16573,3874],{"class":3435},[1291,16575,16576,16579,16581,16583,16585,16588,16590,16593,16595,16597,16599],{"class":3427,"line":3542},[1291,16577,16578],{"class":3819},"        doc",[1291,16580,3738],{"class":3435},[1291,16582,3841],{"class":3812},[1291,16584,694],{"class":3435},[1291,16586,16587],{"class":3812},"apply",[1291,16589,3816],{"class":3435},[1291,16591,16592],{"class":3812},"concat_with_titles",[1291,16594,3566],{"class":3435},[1291,16596,11733],{"class":3435},[1291,16598,16557],{"class":3812},[1291,16600,4242],{"class":3435},[1291,16602,16603],{"class":3427,"line":3547},[1291,16604,11996],{"class":3435},[3189,16606,16608],{"id":16607},"step-4-data-embedding","Step 4: Data embedding",[73,16610,16611,16612,16615,16616,4390],{},"Each document is ",[77,16613,16004],{"href":15921,"rel":16614},[81]," with the OpenAI API and retrieves the embedded result. See the full source code in ",[77,16617,16620],{"href":16618,"rel":16619},"https:\u002F\u002Fgithub.com\u002Fpathway-labs\u002Fchatgpt-api-python-sales\u002Fblob\u002Fmain\u002Fcommon\u002Fembedder.py",[81],"embedder.py",[3418,16622,16624],{"className":3420,"code":16623,"language":3422,"meta":23,"style":23},"...\n\ndef contextful(context, data_to_embed):\n    return context + context.select(data=openai_embedder(data_to_embed))\n...\n",[3061,16625,16626,16630,16634,16652,16685],{"__ignoreMap":23},[1291,16627,16628],{"class":3427,"line":3428},[1291,16629,16257],{"class":3431},[1291,16631,16632],{"class":3427,"line":24},[1291,16633,3526],{"emptyLinePlaceholder":35},[1291,16635,16636,16638,16641,16643,16645,16647,16650],{"class":3427,"line":675},[1291,16637,11398],{"class":7739},[1291,16639,16640],{"class":3812}," contextful",[1291,16642,3816],{"class":3435},[1291,16644,6097],{"class":3819},[1291,16646,3566],{"class":3435},[1291,16648,16649],{"class":3819}," data_to_embed",[1291,16651,11948],{"class":3435},[1291,16653,16654,16656,16659,16662,16665,16667,16669,16671,16673,16675,16678,16680,16683],{"class":3427,"line":3542},[1291,16655,11771],{"class":3475},[1291,16657,16658],{"class":3431}," context ",[1291,16660,16661],{"class":3435},"+",[1291,16663,16664],{"class":3431}," context",[1291,16666,694],{"class":3435},[1291,16668,16571],{"class":3812},[1291,16670,3816],{"class":3435},[1291,16672,3935],{"class":3819},[1291,16674,3738],{"class":3435},[1291,16676,16677],{"class":3812},"openai_embedder",[1291,16679,3816],{"class":3435},[1291,16681,16682],{"class":3812},"data_to_embed",[1291,16684,7178],{"class":3435},[1291,16686,16687],{"class":3427,"line":3547},[1291,16688,16257],{"class":3431},[3189,16690,16692],{"id":16691},"step-5-data-indexing","Step 5: Data indexing",[73,16694,16695],{},"Then we construct an instant index on the generated embeddings:",[3418,16697,16699],{"className":3420,"code":16698,"language":3422,"meta":23,"style":23},"index = index_embeddings(embedded_data)\n",[3061,16700,16701],{"__ignoreMap":23},[1291,16702,16703,16705,16707,16710,16712,16715],{"class":3427,"line":3428},[1291,16704,10889],{"class":3431},[1291,16706,3738],{"class":3435},[1291,16708,16709],{"class":3812}," index_embeddings",[1291,16711,3816],{"class":3435},[1291,16713,16714],{"class":3812},"embedded_data",[1291,16716,3827],{"class":3435},[3189,16718,16720],{"id":16719},"step-6-user-query-processing-and-indexing","Step 6: User query processing and indexing",[73,16722,16723],{},"We create a REST endpoint, take a user query from the API request payload, and embed the user query also with the OpenAI API.",[3418,16725,16727],{"className":3420,"code":16726,"language":3422,"meta":23,"style":23},"...\nquery, response_writer = pw.io.http.rest_connector(\n        host=host,\n        port=port,\n        schema=QueryInputSchema,\n        autocommit_duration_ms=50,\n    )\n\nembedded_query = embeddings(context=query, data_to_embed=pw.this.query)\n...\n",[3061,16728,16729,16733,16762,16773,16784,16795,16807,16811,16815,16852],{"__ignoreMap":23},[1291,16730,16731],{"class":3427,"line":3428},[1291,16732,16257],{"class":3431},[1291,16734,16735,16737,16739,16742,16744,16746,16748,16750,16752,16755,16757,16760],{"class":3427,"line":24},[1291,16736,5803],{"class":3431},[1291,16738,3566],{"class":3435},[1291,16740,16741],{"class":3431}," response_writer ",[1291,16743,3738],{"class":3435},[1291,16745,4073],{"class":3431},[1291,16747,694],{"class":3435},[1291,16749,4078],{"class":3457},[1291,16751,694],{"class":3435},[1291,16753,16754],{"class":3457},"http",[1291,16756,694],{"class":3435},[1291,16758,16759],{"class":3812},"rest_connector",[1291,16761,3874],{"class":3435},[1291,16763,16764,16767,16769,16771],{"class":3427,"line":675},[1291,16765,16766],{"class":3819},"        host",[1291,16768,3738],{"class":3435},[1291,16770,4764],{"class":3812},[1291,16772,4107],{"class":3435},[1291,16774,16775,16778,16780,16782],{"class":3427,"line":3542},[1291,16776,16777],{"class":3819},"        port",[1291,16779,3738],{"class":3435},[1291,16781,5597],{"class":3812},[1291,16783,4107],{"class":3435},[1291,16785,16786,16788,16790,16793],{"class":3427,"line":3547},[1291,16787,16297],{"class":3819},[1291,16789,3738],{"class":3435},[1291,16791,16792],{"class":3812},"QueryInputSchema",[1291,16794,4107],{"class":3435},[1291,16796,16797,16800,16802,16805],{"class":3427,"line":3572},[1291,16798,16799],{"class":3819},"        autocommit_duration_ms",[1291,16801,3738],{"class":3435},[1291,16803,16804],{"class":3451},"50",[1291,16806,4107],{"class":3435},[1291,16808,16809],{"class":3427,"line":3614},[1291,16810,11996],{"class":3435},[1291,16812,16813],{"class":3427,"line":3640},[1291,16814,3526],{"emptyLinePlaceholder":35},[1291,16816,16817,16820,16822,16825,16827,16829,16831,16833,16835,16837,16839,16841,16843,16846,16848,16850],{"class":3427,"line":3665},[1291,16818,16819],{"class":3431},"embedded_query ",[1291,16821,3738],{"class":3435},[1291,16823,16824],{"class":3812}," embeddings",[1291,16826,3816],{"class":3435},[1291,16828,6097],{"class":3819},[1291,16830,3738],{"class":3435},[1291,16832,5803],{"class":3812},[1291,16834,3566],{"class":3435},[1291,16836,16649],{"class":3819},[1291,16838,3738],{"class":3435},[1291,16840,3841],{"class":3812},[1291,16842,694],{"class":3435},[1291,16844,16845],{"class":3457},"this",[1291,16847,694],{"class":3435},[1291,16849,5803],{"class":3457},[1291,16851,3827],{"class":3435},[1291,16853,16854],{"class":3427,"line":3670},[1291,16855,16257],{"class":3431},[3189,16857,16859],{"id":16858},"step-7-similarity-search-and-prompt-engineering","Step 7: Similarity search and prompt engineering",[73,16861,16862,16863,16865],{},"We perform a similarity search by using index to identify the most relevant matches for the query embedding. Then we build a ",[169,16864,4423],{}," that merges the user's query with the fetched relevant data results, send the message to ChatGPT Completion endpoint to produce a proper and detailed response.",[3418,16867,16869],{"className":3420,"code":16868,"language":3422,"meta":23,"style":23},"responses = prompt(index, embedded_query, pw.this.query)\n",[3061,16870,16871],{"__ignoreMap":23},[1291,16872,16873,16876,16878,16881,16883,16886,16888,16891,16893,16895,16897,16899,16901,16903],{"class":3427,"line":3428},[1291,16874,16875],{"class":3431},"responses ",[1291,16877,3738],{"class":3435},[1291,16879,16880],{"class":3812}," prompt",[1291,16882,3816],{"class":3435},[1291,16884,16885],{"class":3812},"index",[1291,16887,3566],{"class":3435},[1291,16889,16890],{"class":3812}," embedded_query",[1291,16892,3566],{"class":3435},[1291,16894,4073],{"class":3812},[1291,16896,694],{"class":3435},[1291,16898,16845],{"class":3457},[1291,16900,694],{"class":3435},[1291,16902,5803],{"class":3457},[1291,16904,3827],{"class":3435},[73,16906,16907,16908,694],{},"We followed the same in context learning approach when we crafted the prompt and added internal knowledge to ChatGPT in the ",[77,16909,16912],{"href":16910,"rel":16911},"https:\u002F\u002Fgithub.com\u002Fpathway-labs\u002Fchatgpt-api-python-sales\u002Fblob\u002Fmain\u002Fcommon\u002Fprompt.py",[81],"prompt.py",[3418,16914,16916],{"className":3420,"code":16915,"language":3422,"meta":23,"style":23},"prompt = f\"Given the following discounts data: \\\\n {docs_str} \\\\nanswer this query: {query}\"\n",[3061,16917,16918],{"__ignoreMap":23},[1291,16919,16920,16922,16924,16926,16929,16932,16935,16937,16940,16942,16945,16948,16950,16952,16954],{"class":3427,"line":3428},[1291,16921,6046],{"class":3431},[1291,16923,3738],{"class":3435},[1291,16925,9484],{"class":7739},[1291,16927,16928],{"class":3439},"\"Given the following discounts data: ",[1291,16930,16931],{"class":3431},"\\\\",[1291,16933,16934],{"class":3439},"n ",[1291,16936,8770],{"class":3451},[1291,16938,16939],{"class":3431},"docs_str",[1291,16941,9671],{"class":3451},[1291,16943,16944],{"class":3431}," \\\\",[1291,16946,16947],{"class":3439},"nanswer this query: ",[1291,16949,8770],{"class":3451},[1291,16951,5803],{"class":3431},[1291,16953,9671],{"class":3451},[1291,16955,3746],{"class":3439},[3189,16957,16959],{"id":16958},"step-8-return-the-response","Step 8: Return the response",[73,16961,16962],{},"The final step is just to return the API response to the user",[3418,16964,16966],{"className":3420,"code":16965,"language":3422,"meta":23,"style":23},"# Build prompt using indexed data\nresponses = prompt(index, embedded_query, pw.this.query)\n\n# Feed the prompt to ChatGPT and obtain the generated answer.\nresponse_writer(responses)\n",[3061,16967,16968,16973,17003,17007,17012],{"__ignoreMap":23},[1291,16969,16970],{"class":3427,"line":3428},[1291,16971,16972],{"class":3673},"# Build prompt using indexed data\n",[1291,16974,16975,16977,16979,16981,16983,16985,16987,16989,16991,16993,16995,16997,16999,17001],{"class":3427,"line":24},[1291,16976,16875],{"class":3431},[1291,16978,3738],{"class":3435},[1291,16980,16880],{"class":3812},[1291,16982,3816],{"class":3435},[1291,16984,16885],{"class":3812},[1291,16986,3566],{"class":3435},[1291,16988,16890],{"class":3812},[1291,16990,3566],{"class":3435},[1291,16992,4073],{"class":3812},[1291,16994,694],{"class":3435},[1291,16996,16845],{"class":3457},[1291,16998,694],{"class":3435},[1291,17000,5803],{"class":3457},[1291,17002,3827],{"class":3435},[1291,17004,17005],{"class":3427,"line":675},[1291,17006,3526],{"emptyLinePlaceholder":35},[1291,17008,17009],{"class":3427,"line":3542},[1291,17010,17011],{"class":3673},"# Feed the prompt to ChatGPT and obtain the generated answer.\n",[1291,17013,17014,17017,17019,17022],{"class":3427,"line":3547},[1291,17015,17016],{"class":3812},"response_writer",[1291,17018,3816],{"class":3435},[1291,17020,17021],{"class":3812},"responses",[1291,17023,3827],{"class":3435},[3189,17025,17027],{"id":17026},"step-9-put-everything-together","Step 9: Put everything together",[73,17029,17030,17031,17034],{},"Now if we put all the above steps together, you have LLM-enabled Python API for custom discounts data ready to use as you see the implementation in the ",[77,17032,7585],{"href":16327,"rel":17033},[81]," Python script.",[3418,17036,17038],{"className":3420,"code":17037,"language":3422,"meta":23,"style":23},"import pathway as pw\nfrom common.transform import transform\nfrom common.embedder import embeddings, index_embeddings\nfrom common.prompt import prompt\ndef run(host, port):\n    # Real-time data coming from external data sources such as csv file\n    sales_data = pw.io.csv.read(\n        \".\u002Fexamples\u002Fcsv\u002Fdata\",\n        schema=CsvDiscountsInputSchema,\n        mode=\"streaming\"\n    )\n\n    # Data source rows transformed into structured documents\n    documents = transform(sales_data)\n\n    # Compute embeddings for each document using the OpenAI Embeddings API\n    embedded_data = embeddings(context=documents, data_to_embed=documents.doc)\n\n    # Construct an index on the generated embeddings in real-time\n    index = index_embeddings(embedded_data)\n\n    # Given a user question as a query from your API\n    query, response_writer = pw.io.http.rest_connector(\n        host=host,\n        port=port,\n        schema=QueryInputSchema,\n        autocommit_duration_ms=50,\n    )\n\n    # Generate embeddings for the query from the OpenAI Embeddings API\n    embedded_query = embeddings(context=query, data_to_embed=pw.this.query)\n\n    # Build prompt using indexed data\n    responses = prompt(index, embedded_query, pw.this.query)\n\n    # Feed the prompt to ChatGPT and obtain the generated answer.\n    response_writer(responses)\n\n    # Run the pipeline\n    pw.run()\n",[3061,17039,17040,17050,17067,17086,17100,17117,17122,17145,17155,17165,17177,17181,17185,17190,17205,17209,17214,17247,17251,17256,17271,17275,17280,17307,17317,17327,17337,17347,17351,17355,17360,17395,17399,17404,17435,17439,17444,17455,17459,17464],{"__ignoreMap":23},[1291,17041,17042,17044,17046,17048],{"class":3427,"line":3428},[1291,17043,3476],{"class":3475},[1291,17045,3533],{"class":3431},[1291,17047,3536],{"class":3475},[1291,17049,3539],{"class":3431},[1291,17051,17052,17054,17057,17059,17062,17064],{"class":3427,"line":24},[1291,17053,3550],{"class":3475},[1291,17055,17056],{"class":3431}," common",[1291,17058,694],{"class":3435},[1291,17060,17061],{"class":3431},"transform ",[1291,17063,3476],{"class":3475},[1291,17065,17066],{"class":3431}," transform\n",[1291,17068,17069,17071,17073,17075,17077,17079,17081,17083],{"class":3427,"line":675},[1291,17070,3550],{"class":3475},[1291,17072,17056],{"class":3431},[1291,17074,694],{"class":3435},[1291,17076,4292],{"class":3431},[1291,17078,3476],{"class":3475},[1291,17080,16824],{"class":3431},[1291,17082,3566],{"class":3435},[1291,17084,17085],{"class":3431}," index_embeddings\n",[1291,17087,17088,17090,17092,17094,17096,17098],{"class":3427,"line":3542},[1291,17089,3550],{"class":3475},[1291,17091,17056],{"class":3431},[1291,17093,694],{"class":3435},[1291,17095,6046],{"class":3431},[1291,17097,3476],{"class":3475},[1291,17099,6130],{"class":3431},[1291,17101,17102,17104,17107,17109,17111,17113,17115],{"class":3427,"line":3547},[1291,17103,11398],{"class":7739},[1291,17105,17106],{"class":3812}," run",[1291,17108,3816],{"class":3435},[1291,17110,4764],{"class":3819},[1291,17112,3566],{"class":3435},[1291,17114,4774],{"class":3819},[1291,17116,11948],{"class":3435},[1291,17118,17119],{"class":3427,"line":3572},[1291,17120,17121],{"class":3673},"    # Real-time data coming from external data sources such as csv file\n",[1291,17123,17124,17127,17129,17131,17133,17135,17137,17139,17141,17143],{"class":3427,"line":3614},[1291,17125,17126],{"class":3431},"    sales_data ",[1291,17128,3738],{"class":3435},[1291,17130,4073],{"class":3431},[1291,17132,694],{"class":3435},[1291,17134,4078],{"class":3457},[1291,17136,694],{"class":3435},[1291,17138,16275],{"class":3457},[1291,17140,694],{"class":3435},[1291,17142,4088],{"class":3812},[1291,17144,3874],{"class":3435},[1291,17146,17147,17149,17151,17153],{"class":3427,"line":3640},[1291,17148,6590],{"class":3435},[1291,17150,16288],{"class":3439},[1291,17152,3691],{"class":3435},[1291,17154,4107],{"class":3435},[1291,17156,17157,17159,17161,17163],{"class":3427,"line":3665},[1291,17158,16297],{"class":3819},[1291,17160,3738],{"class":3435},[1291,17162,16302],{"class":3812},[1291,17164,4107],{"class":3435},[1291,17166,17167,17169,17171,17173,17175],{"class":3427,"line":3670},[1291,17168,6616],{"class":3819},[1291,17170,3738],{"class":3435},[1291,17172,3691],{"class":3435},[1291,17174,5438],{"class":3439},[1291,17176,3746],{"class":3435},[1291,17178,17179],{"class":3427,"line":3677},[1291,17180,11996],{"class":3435},[1291,17182,17183],{"class":3427,"line":3877},[1291,17184,3526],{"emptyLinePlaceholder":35},[1291,17186,17187],{"class":3427,"line":3916},[1291,17188,17189],{"class":3673},"    # Data source rows transformed into structured documents\n",[1291,17191,17192,17195,17197,17199,17201,17203],{"class":3427,"line":4519},[1291,17193,17194],{"class":3431},"    documents ",[1291,17196,3738],{"class":3435},[1291,17198,16552],{"class":3812},[1291,17200,3816],{"class":3435},[1291,17202,16557],{"class":3812},[1291,17204,3827],{"class":3435},[1291,17206,17207],{"class":3427,"line":6038},[1291,17208,3526],{"emptyLinePlaceholder":35},[1291,17210,17211],{"class":3427,"line":6043},[1291,17212,17213],{"class":3673},"    # Compute embeddings for each document using the OpenAI Embeddings API\n",[1291,17215,17216,17219,17221,17223,17225,17227,17229,17232,17234,17236,17238,17240,17242,17245],{"class":3427,"line":6066},[1291,17217,17218],{"class":3431},"    embedded_data ",[1291,17220,3738],{"class":3435},[1291,17222,16824],{"class":3812},[1291,17224,3816],{"class":3435},[1291,17226,6097],{"class":3819},[1291,17228,3738],{"class":3435},[1291,17230,17231],{"class":3812},"documents",[1291,17233,3566],{"class":3435},[1291,17235,16649],{"class":3819},[1291,17237,3738],{"class":3435},[1291,17239,17231],{"class":3812},[1291,17241,694],{"class":3435},[1291,17243,17244],{"class":3457},"doc",[1291,17246,3827],{"class":3435},[1291,17248,17249],{"class":3427,"line":6078},[1291,17250,3526],{"emptyLinePlaceholder":35},[1291,17252,17253],{"class":3427,"line":6089},[1291,17254,17255],{"class":3673},"    # Construct an index on the generated embeddings in real-time\n",[1291,17257,17258,17261,17263,17265,17267,17269],{"class":3427,"line":6124},[1291,17259,17260],{"class":3431},"    index ",[1291,17262,3738],{"class":3435},[1291,17264,16709],{"class":3812},[1291,17266,3816],{"class":3435},[1291,17268,16714],{"class":3812},[1291,17270,3827],{"class":3435},[1291,17272,17273],{"class":3427,"line":6133},[1291,17274,3526],{"emptyLinePlaceholder":35},[1291,17276,17277],{"class":3427,"line":6141},[1291,17278,17279],{"class":3673},"    # Given a user question as a query from your API\n",[1291,17281,17282,17285,17287,17289,17291,17293,17295,17297,17299,17301,17303,17305],{"class":3427,"line":6151},[1291,17283,17284],{"class":3431},"    query",[1291,17286,3566],{"class":3435},[1291,17288,16741],{"class":3431},[1291,17290,3738],{"class":3435},[1291,17292,4073],{"class":3431},[1291,17294,694],{"class":3435},[1291,17296,4078],{"class":3457},[1291,17298,694],{"class":3435},[1291,17300,16754],{"class":3457},[1291,17302,694],{"class":3435},[1291,17304,16759],{"class":3812},[1291,17306,3874],{"class":3435},[1291,17308,17309,17311,17313,17315],{"class":3427,"line":6923},[1291,17310,16766],{"class":3819},[1291,17312,3738],{"class":3435},[1291,17314,4764],{"class":3812},[1291,17316,4107],{"class":3435},[1291,17318,17319,17321,17323,17325],{"class":3427,"line":6928},[1291,17320,16777],{"class":3819},[1291,17322,3738],{"class":3435},[1291,17324,5597],{"class":3812},[1291,17326,4107],{"class":3435},[1291,17328,17329,17331,17333,17335],{"class":3427,"line":6934},[1291,17330,16297],{"class":3819},[1291,17332,3738],{"class":3435},[1291,17334,16792],{"class":3812},[1291,17336,4107],{"class":3435},[1291,17338,17339,17341,17343,17345],{"class":3427,"line":6940},[1291,17340,16799],{"class":3819},[1291,17342,3738],{"class":3435},[1291,17344,16804],{"class":3451},[1291,17346,4107],{"class":3435},[1291,17348,17349],{"class":3427,"line":6952},[1291,17350,11996],{"class":3435},[1291,17352,17353],{"class":3427,"line":6984},[1291,17354,3526],{"emptyLinePlaceholder":35},[1291,17356,17357],{"class":3427,"line":7996},[1291,17358,17359],{"class":3673},"    # Generate embeddings for the query from the OpenAI Embeddings API\n",[1291,17361,17362,17365,17367,17369,17371,17373,17375,17377,17379,17381,17383,17385,17387,17389,17391,17393],{"class":3427,"line":8007},[1291,17363,17364],{"class":3431},"    embedded_query ",[1291,17366,3738],{"class":3435},[1291,17368,16824],{"class":3812},[1291,17370,3816],{"class":3435},[1291,17372,6097],{"class":3819},[1291,17374,3738],{"class":3435},[1291,17376,5803],{"class":3812},[1291,17378,3566],{"class":3435},[1291,17380,16649],{"class":3819},[1291,17382,3738],{"class":3435},[1291,17384,3841],{"class":3812},[1291,17386,694],{"class":3435},[1291,17388,16845],{"class":3457},[1291,17390,694],{"class":3435},[1291,17392,5803],{"class":3457},[1291,17394,3827],{"class":3435},[1291,17396,17397],{"class":3427,"line":8018},[1291,17398,3526],{"emptyLinePlaceholder":35},[1291,17400,17401],{"class":3427,"line":8029},[1291,17402,17403],{"class":3673},"    # Build prompt using indexed data\n",[1291,17405,17406,17409,17411,17413,17415,17417,17419,17421,17423,17425,17427,17429,17431,17433],{"class":3427,"line":8040},[1291,17407,17408],{"class":3431},"    responses ",[1291,17410,3738],{"class":3435},[1291,17412,16880],{"class":3812},[1291,17414,3816],{"class":3435},[1291,17416,16885],{"class":3812},[1291,17418,3566],{"class":3435},[1291,17420,16890],{"class":3812},[1291,17422,3566],{"class":3435},[1291,17424,4073],{"class":3812},[1291,17426,694],{"class":3435},[1291,17428,16845],{"class":3457},[1291,17430,694],{"class":3435},[1291,17432,5803],{"class":3457},[1291,17434,3827],{"class":3435},[1291,17436,17437],{"class":3427,"line":8051},[1291,17438,3526],{"emptyLinePlaceholder":35},[1291,17440,17441],{"class":3427,"line":8057},[1291,17442,17443],{"class":3673},"    # Feed the prompt to ChatGPT and obtain the generated answer.\n",[1291,17445,17446,17449,17451,17453],{"class":3427,"line":8068},[1291,17447,17448],{"class":3812},"    response_writer",[1291,17450,3816],{"class":3435},[1291,17452,17021],{"class":3812},[1291,17454,3827],{"class":3435},[1291,17456,17457],{"class":3427,"line":8079},[1291,17458,3526],{"emptyLinePlaceholder":35},[1291,17460,17461],{"class":3427,"line":8090},[1291,17462,17463],{"class":3673},"    # Run the pipeline\n",[1291,17465,17466,17468,17470,17472],{"class":3427,"line":8101},[1291,17467,6571],{"class":3431},[1291,17469,694],{"class":3435},[1291,17471,11274],{"class":3812},[1291,17473,4871],{"class":3435},[3189,17475,17477],{"id":17476},"running-the-app","Running the app",[73,17479,17480,17481,17484,17485,17488],{},"Follow the instructions in the ",[77,17482,16059],{"href":16057,"rel":17483},[81]," file’s ",[169,17486,17487],{},"How to run the project"," section and you can start to ask questions about discounts, and the API will respond according to the discounts data source you have added.",[73,17490,17491,17492,4390],{},"When the user has the following query in the API request as we asked ChatGPT before and send this request to our API with curl command or using Postmancurl --data '{\"query\": \"Can you find me discounts this week for Adidas men shoes?\"}' ",[77,17493,17494],{"href":17494,"rel":17495},"http:\u002F\u002Flocalhost:8080\u002F",[81],[3418,17497,17501],{"className":17498,"code":17499,"language":17500,"meta":23,"style":23},"language-shell shiki shiki-themes material-theme-palenight","curl --data '{\"query\": \"Can you find me discounts this week for Adidas men's shoes?\"}' \u003Chttp:\u002F\u002Flocalhost:8080\u002F>\n","shell",[3061,17502,17503],{"__ignoreMap":23},[1291,17504,17505,17508,17511,17513,17516,17518,17521,17524,17526],{"class":3427,"line":3428},[1291,17506,17507],{"class":6356},"curl",[1291,17509,17510],{"class":3439}," --data",[1291,17512,6415],{"class":3435},[1291,17514,17515],{"class":3439},"{\"query\": \"Can you find me discounts this week for Adidas men",[1291,17517,3436],{"class":3435},[1291,17519,17520],{"class":3439},"s",[1291,17522,17523],{"class":3439}," shoes?",[1291,17525,3691],{"class":3435},[1291,17527,17528],{"class":3439},"}' \u003Chttp:\u002F\u002Flocalhost:8080\u002F>\n",[73,17530,17531],{},"You will get the response with some discounts available based on your custom data (CSV file) as we expected.",[3418,17533,17535],{"className":17498,"code":17534,"language":17500,"meta":23,"style":23},"Based on the given data, there is one discount available this week for Adidas men's shoes:\n \nAvailable until 2023-10-28 in San Francisco, CA, USA. \n\nHere is the cleaned output:\nDiscount Until: 2023-10-28\nCountry: USA\\\\nCity: San Francisco\nState: CA\\\\nPostal Code: 87097\nRegion: West\nProduct ID: 9803\nCategory: Footwear\nSub-category: Men's Shoe\nBrand: Adidas\\\\n\nProduct Name: Running Shoes\nCurrency: USD\nActual Price: 130 \nDiscount Price: 76.30\nDiscount Percentage: 58%\nAddress: 321 Oak St\n",[3061,17536,17537,17588,17592,17597,17601,17606,17611,17616,17621,17626,17631,17636,17648,17660,17674,17682,17695,17705,17715],{"__ignoreMap":23},[1291,17538,17539,17542,17545,17548,17551,17554,17557,17560,17563,17566,17569,17572,17575,17577,17580,17583,17585],{"class":3427,"line":3428},[1291,17540,17541],{"class":6356},"Based",[1291,17543,17544],{"class":3439}," on",[1291,17546,17547],{"class":3439}," the",[1291,17549,17550],{"class":3439}," given",[1291,17552,17553],{"class":3439}," data,",[1291,17555,17556],{"class":3439}," there",[1291,17558,17559],{"class":3439}," is",[1291,17561,17562],{"class":3439}," one",[1291,17564,17565],{"class":3439}," discount",[1291,17567,17568],{"class":3439}," available",[1291,17570,17571],{"class":3439}," this",[1291,17573,17574],{"class":3439}," week",[1291,17576,9560],{"class":3439},[1291,17578,17579],{"class":3439}," Adidas",[1291,17581,17582],{"class":3439}," men",[1291,17584,3436],{"class":3435},[1291,17586,17587],{"class":3439},"s shoes:\n",[1291,17589,17590],{"class":3427,"line":24},[1291,17591,7743],{"class":3439},[1291,17593,17594],{"class":3427,"line":675},[1291,17595,17596],{"class":3439},"Available until 2023-10-28 in San Francisco, CA, USA. \n",[1291,17598,17599],{"class":3427,"line":3542},[1291,17600,3526],{"emptyLinePlaceholder":35},[1291,17602,17603],{"class":3427,"line":3547},[1291,17604,17605],{"class":3439},"Here is the cleaned output:\n",[1291,17607,17608],{"class":3427,"line":3572},[1291,17609,17610],{"class":3439},"Discount Until: 2023-10-28\n",[1291,17612,17613],{"class":3427,"line":3614},[1291,17614,17615],{"class":3439},"Country: USA\\\\nCity: San Francisco\n",[1291,17617,17618],{"class":3427,"line":3640},[1291,17619,17620],{"class":3439},"State: CA\\\\nPostal Code: 87097\n",[1291,17622,17623],{"class":3427,"line":3665},[1291,17624,17625],{"class":3439},"Region: West\n",[1291,17627,17628],{"class":3427,"line":3670},[1291,17629,17630],{"class":3439},"Product ID: 9803\n",[1291,17632,17633],{"class":3427,"line":3677},[1291,17634,17635],{"class":3439},"Category: Footwear\n",[1291,17637,17638,17641,17643,17645],{"class":3427,"line":3877},[1291,17639,17640],{"class":3439},"Sub-category: Men",[1291,17642,3436],{"class":3435},[1291,17644,17520],{"class":3439},[1291,17646,17647],{"class":3439}," Shoe\n",[1291,17649,17650,17653,17655,17657],{"class":3427,"line":3916},[1291,17651,17652],{"class":6356},"Brand:",[1291,17654,17579],{"class":3439},[1291,17656,16931],{"class":3431},[1291,17658,17659],{"class":3439},"n\n",[1291,17661,17662,17665,17668,17671],{"class":3427,"line":4519},[1291,17663,17664],{"class":6356},"Product",[1291,17666,17667],{"class":3439}," Name:",[1291,17669,17670],{"class":3439}," Running",[1291,17672,17673],{"class":3439}," Shoes\n",[1291,17675,17676,17679],{"class":3427,"line":6038},[1291,17677,17678],{"class":6356},"Currency:",[1291,17680,17681],{"class":3439}," USD\n",[1291,17683,17684,17687,17690,17693],{"class":3427,"line":6043},[1291,17685,17686],{"class":6356},"Actual",[1291,17688,17689],{"class":3439}," Price:",[1291,17691,17692],{"class":3451}," 130",[1291,17694,7743],{"class":3431},[1291,17696,17697,17700,17702],{"class":3427,"line":6066},[1291,17698,17699],{"class":6356},"Discount",[1291,17701,17689],{"class":3439},[1291,17703,17704],{"class":3451}," 76.30\n",[1291,17706,17707,17709,17712],{"class":3427,"line":6078},[1291,17708,17699],{"class":6356},[1291,17710,17711],{"class":3439}," Percentage:",[1291,17713,17714],{"class":3439}," 58%\n",[1291,17716,17717,17720,17723,17726],{"class":3427,"line":6089},[1291,17718,17719],{"class":6356},"Address:",[1291,17721,17722],{"class":3451}," 321",[1291,17724,17725],{"class":3439}," Oak",[1291,17727,17728],{"class":3439}," St\n",[73,17730,17731,17732,17737],{},"In case you use it as a data source ",[77,17733,17736],{"href":17734,"rel":17735},"https:\u002F\u002Fwww.rainforestapi.com\u002Fdocs\u002Fproduct-data-api\u002Fparameters\u002Fdeals",[81],"Rainforest API"," provides real-time deals for Amazon products, you will get the following output for the same request:",[3098,17739],{"alt":17740,"src":17741},"Rainforest API test chatGPT","\u002Fassets\u002Fcontent\u002Fblog\u002Frainforest-api-test-chatgpt.gif",[140,17743,17745],{"id":17744},"further-improvements","Further Improvements",[73,17747,17748],{},"We've only discovered a few capabilities of the LLM App by adding domain-specific knowledge like discounts to ChatGPT. There are more things you can achieve:",[145,17750,17751,17754,17757,17760],{},[148,17752,17753],{},"Incorporate additional data from external APIs, along with various files (such as Jsonlines, PDF, Doc, HTML, or Text format), databases like PostgreSQL or MySQL, and stream data from platforms like Kafka, Redpanda, or Debedizum.",[148,17755,17756],{},"Merge data from these sources instantly.",[148,17758,17759],{},"Maintain a data snapshot to observe variations in sales prices over time, as Pathway Live Data Framework provides a built-in feature to compute differences between two alterations.",[148,17761,17762],{},"Beyond making data accessible via API, the LLM App allows you to relay processed data to other downstream connectors, such as BI and analytics tools. For instance, set it up to receive alerts upon detecting price shifts.",[140,17764,17766],{"id":17765},"related-resources","Related resources",[145,17768,17769,17775],{},[148,17770,17771],{},[77,17772,17774],{"href":15745,"rel":17773},[81],"LLM App GitHub repository",[148,17776,17777],{},[77,17778,17780],{"href":17779},"\u002Fdevelopers\u002Fuser-guide\u002Fllm-xpack\u002Fllm-app","All You Need to Know to Build Your First LLM App",[3189,17782,17784],{"id":17783},"community","Community",[73,17786,17787,17788,17792],{},"Join the ",[77,17789,17791],{"href":3072,"rel":17790},[81],"Discord channel"," to see how the AI ChatBot assistant works that we built using LLM App.",[5019,17794,17795],{},"html pre.shiki code .s0W1g, html code.shiki .s0W1g{--shiki-default:#BABED8}html pre.shiki code .sAklC, html code.shiki .sAklC{--shiki-default:#89DDFF}html pre.shiki code .s-wAU, html code.shiki .s-wAU{--shiki-default:#F07178}html pre.shiki code .sdLwU, html code.shiki .sdLwU{--shiki-default:#82AAFF}html pre.shiki code .sfyAc, html code.shiki .sfyAc{--shiki-default:#C3E88D}html pre.shiki code .s7ZW3, html code.shiki .s7ZW3{--shiki-default:#BABED8;--shiki-default-font-style:italic}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html pre.shiki code .s6cf3, html code.shiki .s6cf3{--shiki-default:#89DDFF;--shiki-default-font-style:italic}html pre.shiki code .sJ14y, html code.shiki .sJ14y{--shiki-default:#C792EA}html pre.shiki code .s5Dmg, html code.shiki .s5Dmg{--shiki-default:#FFCB6B}html pre.shiki code .sx098, html code.shiki .sx098{--shiki-default:#F78C6C}html pre.shiki code .saEQR, html code.shiki .saEQR{--shiki-default:#676E95;--shiki-default-font-style:italic}",{"title":23,"searchDepth":24,"depth":24,"links":17797},[17798,17799,17800,17801,17802,17815,17816],{"id":15770,"depth":24,"text":15771},{"id":15791,"depth":24,"text":15792},{"id":15904,"depth":24,"text":15905},{"id":15954,"depth":24,"text":15955},{"id":16040,"depth":24,"text":16041,"children":17803},[17804,17805,17806,17807,17808,17809,17810,17811,17812,17813,17814],{"id":16063,"depth":675,"text":16064},{"id":16092,"depth":675,"text":16093},{"id":16225,"depth":675,"text":16226},{"id":16522,"depth":675,"text":16523},{"id":16607,"depth":675,"text":16608},{"id":16691,"depth":675,"text":16692},{"id":16719,"depth":675,"text":16720},{"id":16858,"depth":675,"text":16859},{"id":16958,"depth":675,"text":16959},{"id":17026,"depth":675,"text":17027},{"id":17476,"depth":675,"text":17477},{"id":17744,"depth":24,"text":17745},{"id":17765,"depth":24,"text":17766,"children":17817},[17818],{"id":17783,"depth":675,"text":17784},"Looking to make ChatGPT answer unfamiliar topics? Here is a step-by-step on how to achieve that.",{"aside":34,"layout":90,"thumbnail":17821,"tags":17823,"date":17825,"related":34,"hidden":35},{"src":17822,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fchatgpt-python-api-real-time-data.png",[90,17824,6268],"tutorial","2023-08-28","\u002Fframework\u002Fblog\u002Fchatgpt-python-api-real-time-data",{"title":15685,"description":17819},{"loc":17826},"framework\u002Fblog\u002F1010.chatgpt-python-api-real-time-data","AJRVGAAr0-1V39TfF3eu01xBIaYCtaPLH7abbYHZhts",{"id":17832,"title":17833,"author":17834,"body":17841,"description":19157,"extension":27,"meta":19158,"navigation":35,"path":19173,"seo":19174,"sitemap":19175,"stem":19176,"__hash__":19177},"content\u002Fframework\u002Fblog\u002F1020.pathway-laposte-microservices.md","How La Poste uses Pathway microservices to deliver high-quality ETAs",{"id":17835,"url":17836,"name":17837,"description":17838,"img":17839,"provider":11,"linkedin":17840},"sergey","sergey-kulik","Sergey Kulik","Lead Software Research Engineer and Solutions Architect","\u002Fassets\u002Fpictures\u002Fimage_kulik_pathway.jpg","https:\u002F\u002Fwww.linkedin.com\u002Fin\u002Fsergey-kulik-72506a33\u002F",{"type":13,"value":17842,"toc":19139},[17843,17846,17849,17855,17862,17865,17868,17872,17875,17878,17881,17885,17888,17902,17905,17908,17913,17917,17920,17923,17931,17934,17937,17941,17944,17948,17951,17954,18023,18034,18133,18136,18198,18201,18210,18213,18267,18270,18274,18277,18280,18283,18327,18330,18333,18485,18492,18519,18522,18525,18614,18618,18621,18624,18631,18656,18659,18704,18708,18715,18718,18799,18808,18811,18814,18925,18928,18931,18935,18938,18942,18945,18965,18969,18972,18992,18996,18999,19002,19012,19019,19083,19086,19089,19093,19096,19116,19118,19121,19124,19136],[68,17844,17833],{"id":17845},"how-la-poste-uses-pathway-microservices-to-deliver-high-quality-etas",[73,17847,17848],{},"La Poste is France’s national postal service provider, spanning mail, express shipping, banking, and even mobile offerings. Over the centuries, it has become a key player in both logistics and financial services. The network moves parcels through 17 industrial platforms, orchestrates 400-plus truck movements each day, and streams 16 million geolocation points a year.",[73,17850,17851,17854],{},[169,17852,17853],{},"Every second",", hundreds of IoT devices emit data points about the status of operations. Turning that data into reliable ETAs is key to improving efficiency, reducing delays, and avoiding congestion and incidents.",[73,17856,17857,17858,17861],{},"In early 2024, the La Poste Technological Innovation team adopted the Pathway Live Data Framework’s Python-native streaming engine and stitched pipeline-to-pipeline microservices together over Delta Lake: data preparation, prediction, ground-truth extraction, and evaluation. This created a ",[169,17859,17860],{},"LiveAI™ layer",", effectively a digital twin of the fleet, that turns raw GPS points into sub-second ETAs and real-time anomaly alerts in one continuous flow.",[73,17863,17864],{},"The migration has already cut the IoT platform’s total cost of ownership by 50 % and is projected to reduce fleet CAPEX by 16 %. “It’s a paradigm shift … the ROI is enormous,” says Jean-Paul Fabre, Head of Technological Innovation.",[73,17866,17867],{},"The sections ahead retrace that journey from the initial monolithic prototype to today’s modular pipeline, detailing the trade-offs and efficient scaling techniques.",[140,17869,17871],{"id":17870},"the-problem-estimating-arrival-times","The Problem: Estimating Arrival Times",[73,17873,17874],{},"Imagine a stream of IoT data generated by hundreds of transport units. This data is published on a Kafka topic and is ingested by the framework. For simplicity, each data point can be assumed to include a transport unit ID, latitude and longitude, speed, and a timestamp.",[73,17876,17877],{},"Now consider a second stream: ETA requests. Each request contains latitude and longitude, the ID of the assigned transport unit, and a timestamp indicating when the event occurred. This Kafka topic is partitioned so all requests associated with a given transport unit ID follow the intended arrivals sequence.",[73,17879,17880],{},"The goal of the pipeline is to produce estimated arrival times for the requested goods. The following section breaks down the components of this pipeline in more detail.",[140,17882,17884],{"id":17883},"first-solution-the-monolithic-pipeline","First Solution: The Monolithic Pipeline",[73,17886,17887],{},"While solving the problem using a single pipeline may seem straightforward, it would involve at least the following components:",[145,17889,17890,17896],{},[148,17891,17892,17895],{},[169,17893,17894],{},"Data cleanup and normalization",". For instance, a transport unit may enter a tunnel and report invalid GPS data. A common anomaly is the coordinate (0, 0), which corresponds to a location in the Atlantic Ocean approximately 600 kilometers off the coast of West Africa. Another frequently encountered issue involves duplicated events with identical timestamps that arrive in Kafka later. It is essential to filter out such incorrect or corrupted events to ensure the dataset remains clean and reliable.",[148,17897,17898,17901],{},[169,17899,17900],{},"Prediction job",". Once the dataset has been cleaned, predictions can be generated from the incoming data. Estimating ETAs is a complex task, requiring consideration of multiple factors such as road networks and conditions, time of day, and historical trends. However, simplified approaches are discussed since this tutorial focuses on microservice architecture.",[73,17903,17904],{},"These two tasks - data preparation and prediction - can be implemented together within a single pipeline. In such a setup, there is no immediate necessity to split them into separate services, which leads to what we refer to as a monolithic pipeline.",[73,17906,17907],{},"Once the core pipeline is in place, it's also important to include mechanisms for continuously evaluating its performance, monitoring the quality of predictions, and quickly detecting any degradation.",[73,17909,17910],{},[169,17911,17912],{},"Maintaining such a pipeline is challenging.",[140,17914,17916],{"id":17915},"using-microservices-for-a-more-production-grade-pipeline","Using Microservices for a More Production-Grade Pipeline",[73,17918,17919],{},"The previous section covered how to build a monolithic pipeline that provides basic ETAs. Now, it's time to focus on adding the necessary components for quality evaluation, model improvement, and alerting in case of a problem.",[73,17921,17922],{},"To enable this, a source of ground truths is required. These can be derived from the processed data stream: when an event indicates a transport unit has reached its designated target location, it can be interpreted as a completed delivery. The corresponding timestamp then serves as the actual arrival time - or ground truth - for evaluation. This brings us to a reason for introducing a split in the previously described pipeline: the data preparation process and the prediction logic are now decoupled. The cleaned and normalized data is reused, not only for prediction but also for ground truth detection. Separating these responsibilities makes it easier to manage, scale, and evolve each part independently.",[1141,17924],{"alt":17925,"className":17926,"height":17927,"quality":17928,"src":17929,"title":17925,"width":17930},"Resulting architecture",[133],612,"100","\u002Fassets\u002Fcontent\u002Fblog\u002Fla-poste-microservices\u002Fpathway-laposte-microservices.svg",1160,[73,17932,17933],{},"This mechanism supports multiple use cases handled by a separate Pathway instance. One key application is monitoring the quality of predictions. Several other processes also play a vital role in maintaining the overall pipeline. Although these are not discussed in detail here, they are briefly covered later in the \"Going further\" section.",[73,17935,17936],{},"This architecture comprises four pipelines: data preparation, prediction, ground truth calculation, and evaluation. The primary advantage of this microservice-style design lies not only in its simplicity but also in its flexibility—for example, experimenting with a new prediction model requires updating only one component of the system.",[140,17938,17940],{"id":17939},"key-components","Key components",[73,17942,17943],{},"This section describes the four main components of the pipeline. Since including every detail would be impractical, only the most relevant and illustrative pieces of code are provided. The aim is to show how a pipeline built with Pathway Live Data Framework can be decomposed into several smaller, more manageable pipelines.",[3189,17945,17947],{"id":17946},"data-acquisition-and-filtering-pipeline","Data Acquisition and Filtering Pipeline",[73,17949,17950],{},"Assume a Kafka topic contains a stream of data points emitted by various transport units. The goal of the first pipeline component is to read this data, clean it, and prepare it for the prediction stage.",[73,17952,17953],{},"First, a schema must be defined. As outlined in the previous section, each event contains the following fields: latitude, longitude, transport unit ID, speed, and a timestamp (expressed as a UNIX timestamp). The schema may be structured as follows:",[3418,17955,17957],{"className":3420,"code":17956,"language":3422,"meta":23,"style":23},"class InputEntrySchema(pw.Schema):\n    transport_unit_id: str\n    latitude: float\n    longitude: float\n    speed: float\n    timestamp: int\n",[3061,17958,17959,17976,17985,17995,18004,18013],{"__ignoreMap":23},[1291,17960,17961,17963,17966,17968,17970,17972,17974],{"class":3427,"line":3428},[1291,17962,16356],{"class":7739},[1291,17964,17965],{"class":6356}," InputEntrySchema",[1291,17967,3816],{"class":3435},[1291,17969,3841],{"class":6356},[1291,17971,694],{"class":3435},[1291,17973,16368],{"class":6356},[1291,17975,11948],{"class":3435},[1291,17977,17978,17981,17983],{"class":3427,"line":24},[1291,17979,17980],{"class":3431},"    transport_unit_id",[1291,17982,4390],{"class":3435},[1291,17984,16380],{"class":6356},[1291,17986,17987,17990,17992],{"class":3427,"line":675},[1291,17988,17989],{"class":3431},"    latitude",[1291,17991,4390],{"class":3435},[1291,17993,17994],{"class":6356}," float\n",[1291,17996,17997,18000,18002],{"class":3427,"line":3542},[1291,17998,17999],{"class":3431},"    longitude",[1291,18001,4390],{"class":3435},[1291,18003,17994],{"class":6356},[1291,18005,18006,18009,18011],{"class":3427,"line":3547},[1291,18007,18008],{"class":3431},"    speed",[1291,18010,4390],{"class":3435},[1291,18012,17994],{"class":6356},[1291,18014,18015,18018,18020],{"class":3427,"line":3572},[1291,18016,18017],{"class":3431},"    timestamp",[1291,18019,4390],{"class":3435},[1291,18021,18022],{"class":6356}," int\n",[73,18024,18025,18026,18029,18030,18033],{},"Since the data originates from a Kafka source, it can be read using ",[3061,18027,18028],{},"pw.io.kafka.simple_read"," for simplicity. The more robust ",[3061,18031,18032],{},"pw.io.kafka.read"," would typically be used in a production environment, allowing detailed configuration of rdkafka settings and other parameters. For demonstration purposes, the simpler version is sufficient:",[3418,18035,18037],{"className":3420,"code":18036,"language":3422,"meta":23,"style":23},"input_signals = pw.io.kafka.simple_read(\n    os.environ[\"KAFKA_IOT_SERVER\"],\n    os.environ[\"KAFKA_IOT_SIGNALS_TOPIC\"],\n    schema=InputEntrySchema,\n    format=\"json\",\n)\n",[3061,18038,18039,18064,18084,18103,18115,18129],{"__ignoreMap":23},[1291,18040,18041,18044,18046,18048,18050,18052,18054,18057,18059,18062],{"class":3427,"line":3428},[1291,18042,18043],{"class":3431},"input_signals ",[1291,18045,3738],{"class":3435},[1291,18047,4073],{"class":3431},[1291,18049,694],{"class":3435},[1291,18051,4078],{"class":3457},[1291,18053,694],{"class":3435},[1291,18055,18056],{"class":3457},"kafka",[1291,18058,694],{"class":3435},[1291,18060,18061],{"class":3812},"simple_read",[1291,18063,3874],{"class":3435},[1291,18065,18066,18068,18070,18072,18074,18076,18079,18081],{"class":3427,"line":24},[1291,18067,6477],{"class":3812},[1291,18069,694],{"class":3435},[1291,18071,3685],{"class":3457},[1291,18073,3688],{"class":3435},[1291,18075,3691],{"class":3435},[1291,18077,18078],{"class":3439},"KAFKA_IOT_SERVER",[1291,18080,3691],{"class":3435},[1291,18082,18083],{"class":3435},"],\n",[1291,18085,18086,18088,18090,18092,18094,18096,18099,18101],{"class":3427,"line":675},[1291,18087,6477],{"class":3812},[1291,18089,694],{"class":3435},[1291,18091,3685],{"class":3457},[1291,18093,3688],{"class":3435},[1291,18095,3691],{"class":3435},[1291,18097,18098],{"class":3439},"KAFKA_IOT_SIGNALS_TOPIC",[1291,18100,3691],{"class":3435},[1291,18102,18083],{"class":3435},[1291,18104,18105,18108,18110,18113],{"class":3427,"line":3542},[1291,18106,18107],{"class":3819},"    schema",[1291,18109,3738],{"class":3435},[1291,18111,18112],{"class":3812},"InputEntrySchema",[1291,18114,4107],{"class":3435},[1291,18116,18117,18119,18121,18123,18125,18127],{"class":3427,"line":3547},[1291,18118,4112],{"class":3819},[1291,18120,3738],{"class":3435},[1291,18122,3691],{"class":3435},[1291,18124,8623],{"class":3439},[1291,18126,3691],{"class":3435},[1291,18128,4107],{"class":3435},[1291,18130,18131],{"class":3427,"line":3572},[1291,18132,3827],{"class":3435},[73,18134,18135],{},"Once the data is ingested, it must be cleaned. This involves filtering out erroneous records, such as those with invalid GPS coordinates. One common issue is the presence of events with coordinates (0, 0) - a location known as Null Island, which clearly indicates corrupted or incomplete data. A basic filtering step might look like this:",[3418,18137,18139],{"className":3420,"code":18138,"language":3422,"meta":23,"style":23},"filtered_signals = input_signals.filter(\n    pw.this.latitude != 0 or pw.this.longitude != 0\n)\n",[3061,18140,18141,18158,18194],{"__ignoreMap":23},[1291,18142,18143,18146,18148,18151,18153,18156],{"class":3427,"line":3428},[1291,18144,18145],{"class":3431},"filtered_signals ",[1291,18147,3738],{"class":3435},[1291,18149,18150],{"class":3431}," input_signals",[1291,18152,694],{"class":3435},[1291,18154,18155],{"class":3812},"filter",[1291,18157,3874],{"class":3435},[1291,18159,18160,18162,18164,18166,18168,18171,18174,18177,18179,18181,18183,18185,18187,18190,18192],{"class":3427,"line":24},[1291,18161,6571],{"class":3812},[1291,18163,694],{"class":3435},[1291,18165,16845],{"class":3457},[1291,18167,694],{"class":3435},[1291,18169,18170],{"class":3457},"latitude",[1291,18172,18173],{"class":3435}," !=",[1291,18175,18176],{"class":3451}," 0",[1291,18178,9931],{"class":3475},[1291,18180,4073],{"class":3812},[1291,18182,694],{"class":3435},[1291,18184,16845],{"class":3457},[1291,18186,694],{"class":3435},[1291,18188,18189],{"class":3457},"longitude",[1291,18191,18173],{"class":3435},[1291,18193,7894],{"class":3451},[1291,18195,18196],{"class":3427,"line":675},[1291,18197,3827],{"class":3435},[73,18199,18200],{},"Additional filters can be applied in a similar chain to handle other anomalies, such as duplicate events or inconsistent timestamps.",[73,18202,18203,18204,18209],{},"After applying the necessary filters and transformations, the cleaned data can be persisted for downstream processing. ",[77,18205,18208],{"href":18206,"rel":18207},"https:\u002F\u002Fdelta.io\u002F",[81],"Delta Lake"," is a suitable choice for its simplicity and flexibility. It supports a variety of storage backends - including local file systems and S3 - and does not require any background services or binaries, making it a lightweight yet efficient option.",[73,18211,18212],{},"To write the cleaned data to Delta Lake:",[3418,18214,18216],{"className":3420,"code":18215,"language":3422,"meta":23,"style":23},"pw.io.deltalake.write(\n    table,\n    os.environ[\"PREPARED_DATA_DELTA_TABLE\"],\n)\n",[3061,18217,18218,18237,18244,18263],{"__ignoreMap":23},[1291,18219,18220,18222,18224,18226,18228,18231,18233,18235],{"class":3427,"line":3428},[1291,18221,3841],{"class":3431},[1291,18223,694],{"class":3435},[1291,18225,4078],{"class":3457},[1291,18227,694],{"class":3435},[1291,18229,18230],{"class":3457},"deltalake",[1291,18232,694],{"class":3435},[1291,18234,9700],{"class":3812},[1291,18236,3874],{"class":3435},[1291,18238,18239,18242],{"class":3427,"line":24},[1291,18240,18241],{"class":3812},"    table",[1291,18243,4107],{"class":3435},[1291,18245,18246,18248,18250,18252,18254,18256,18259,18261],{"class":3427,"line":675},[1291,18247,6477],{"class":3812},[1291,18249,694],{"class":3435},[1291,18251,3685],{"class":3457},[1291,18253,3688],{"class":3435},[1291,18255,3691],{"class":3435},[1291,18257,18258],{"class":3439},"PREPARED_DATA_DELTA_TABLE",[1291,18260,3691],{"class":3435},[1291,18262,18083],{"class":3435},[1291,18264,18265],{"class":3427,"line":3542},[1291,18266,3827],{"class":3435},[73,18268,18269],{},"At this point, the data is cleaned and ready for use by other components in the pipeline.",[3189,18271,18273],{"id":18272},"prediction-pipeline","Prediction Pipeline",[73,18275,18276],{},"With the prepared table now containing clean data, free of zero GPS coordinates, delayed events, duplicates, and other anomalies, a separate process can be implemented to perform predictions. This prediction process runs independently, parallel to the initial data preparation pipeline.",[73,18278,18279],{},"To begin, the table must be opened. The Pathway Live Data Framework provides a simplified method for using Delta Lake as the output connector. Delta Lake allows storing schema as part of the table's metadata. The framework handles this end-to-end: when writing, it saves the schema automatically; when reading, it retrieves and applies it without requiring manual intervention.",[73,18281,18282],{},"Given this setup, the cleaned data table produced by the first process can be loaded as follows:",[3418,18284,18286],{"className":3420,"code":18285,"language":3422,"meta":23,"style":23},"signals = pw.io.deltalake.read(os.environ[\"PREPARED_DATA_DELTA_TABLE\"])\n",[3061,18287,18288],{"__ignoreMap":23},[1291,18289,18290,18293,18295,18297,18299,18301,18303,18305,18307,18309,18311,18313,18315,18317,18319,18321,18323,18325],{"class":3427,"line":3428},[1291,18291,18292],{"class":3431},"signals ",[1291,18294,3738],{"class":3435},[1291,18296,4073],{"class":3431},[1291,18298,694],{"class":3435},[1291,18300,4078],{"class":3457},[1291,18302,694],{"class":3435},[1291,18304,18230],{"class":3457},[1291,18306,694],{"class":3435},[1291,18308,4088],{"class":3812},[1291,18310,3816],{"class":3435},[1291,18312,3680],{"class":3812},[1291,18314,694],{"class":3435},[1291,18316,3685],{"class":3457},[1291,18318,3688],{"class":3435},[1291,18320,3691],{"class":3435},[1291,18322,18258],{"class":3439},[1291,18324,3691],{"class":3435},[1291,18326,9572],{"class":3435},[73,18328,18329],{},"The table containing ETA requests can be read separately. Depending on the implementation, this data may be stored either in Delta Lake or in Kafka. The exact choice is typically less critical, as experience shows that GPS tracking data tends to be more problematic than the request stream.",[73,18331,18332],{},"If the requests are read from Kafka, the process starts by defining the schema, followed by reading the stream using a method such as:",[3418,18334,18336],{"className":3420,"code":18335,"language":3422,"meta":23,"style":23},"class RequestsSchema(pw.Schema):\n    request_id: str\n    transport_unit_id: str\n    latitude: float\n    longitude: float\n\n\nrequests = pw.io.kafka.simple_read(\n    os.environ[\"KAFKA_IOT_SERVER\"],\n    os.environ[\"KAFKA_REQUESTS_TOPIC\"],\n    schema=RequestSchema,\n    format=\"json\",\n)\n",[3061,18337,18338,18355,18364,18372,18380,18388,18392,18396,18419,18437,18456,18467,18481],{"__ignoreMap":23},[1291,18339,18340,18342,18345,18347,18349,18351,18353],{"class":3427,"line":3428},[1291,18341,16356],{"class":7739},[1291,18343,18344],{"class":6356}," RequestsSchema",[1291,18346,3816],{"class":3435},[1291,18348,3841],{"class":6356},[1291,18350,694],{"class":3435},[1291,18352,16368],{"class":6356},[1291,18354,11948],{"class":3435},[1291,18356,18357,18360,18362],{"class":3427,"line":24},[1291,18358,18359],{"class":3431},"    request_id",[1291,18361,4390],{"class":3435},[1291,18363,16380],{"class":6356},[1291,18365,18366,18368,18370],{"class":3427,"line":675},[1291,18367,17980],{"class":3431},[1291,18369,4390],{"class":3435},[1291,18371,16380],{"class":6356},[1291,18373,18374,18376,18378],{"class":3427,"line":3542},[1291,18375,17989],{"class":3431},[1291,18377,4390],{"class":3435},[1291,18379,17994],{"class":6356},[1291,18381,18382,18384,18386],{"class":3427,"line":3547},[1291,18383,17999],{"class":3431},[1291,18385,4390],{"class":3435},[1291,18387,17994],{"class":6356},[1291,18389,18390],{"class":3427,"line":3572},[1291,18391,3526],{"emptyLinePlaceholder":35},[1291,18393,18394],{"class":3427,"line":3614},[1291,18395,3526],{"emptyLinePlaceholder":35},[1291,18397,18398,18401,18403,18405,18407,18409,18411,18413,18415,18417],{"class":3427,"line":3640},[1291,18399,18400],{"class":3431},"requests ",[1291,18402,3738],{"class":3435},[1291,18404,4073],{"class":3431},[1291,18406,694],{"class":3435},[1291,18408,4078],{"class":3457},[1291,18410,694],{"class":3435},[1291,18412,18056],{"class":3457},[1291,18414,694],{"class":3435},[1291,18416,18061],{"class":3812},[1291,18418,3874],{"class":3435},[1291,18420,18421,18423,18425,18427,18429,18431,18433,18435],{"class":3427,"line":3665},[1291,18422,6477],{"class":3812},[1291,18424,694],{"class":3435},[1291,18426,3685],{"class":3457},[1291,18428,3688],{"class":3435},[1291,18430,3691],{"class":3435},[1291,18432,18078],{"class":3439},[1291,18434,3691],{"class":3435},[1291,18436,18083],{"class":3435},[1291,18438,18439,18441,18443,18445,18447,18449,18452,18454],{"class":3427,"line":3670},[1291,18440,6477],{"class":3812},[1291,18442,694],{"class":3435},[1291,18444,3685],{"class":3457},[1291,18446,3688],{"class":3435},[1291,18448,3691],{"class":3435},[1291,18450,18451],{"class":3439},"KAFKA_REQUESTS_TOPIC",[1291,18453,3691],{"class":3435},[1291,18455,18083],{"class":3435},[1291,18457,18458,18460,18462,18465],{"class":3427,"line":3677},[1291,18459,18107],{"class":3819},[1291,18461,3738],{"class":3435},[1291,18463,18464],{"class":3812},"RequestSchema",[1291,18466,4107],{"class":3435},[1291,18468,18469,18471,18473,18475,18477,18479],{"class":3427,"line":3877},[1291,18470,4112],{"class":3819},[1291,18472,3738],{"class":3435},[1291,18474,3691],{"class":3435},[1291,18476,8623],{"class":3439},[1291,18478,3691],{"class":3435},[1291,18480,4107],{"class":3435},[1291,18482,18483],{"class":3427,"line":3916},[1291,18484,3827],{"class":3435},[73,18486,18487,18488,18491],{},"Once the ETA requests are read, the prediction logic can be implemented - for example, by estimating the ETA based on the current location of each transport unit. More advanced approaches are also possible; for instance, a custom reducer can accumulate requests per transport unit and predict the ETA based on the sequence of upcoming target points. This logic can be encapsulated within a method named ",[3061,18489,18490],{},"build_predictions",", which takes the table of prepared, cleaned signals and the table of ETA requests as input and returns a table containing the computed predictions.",[3418,18493,18495],{"className":3420,"code":18494,"language":3422,"meta":23,"style":23},"predictions = build_predictions(signals, requests)\n",[3061,18496,18497],{"__ignoreMap":23},[1291,18498,18499,18502,18504,18507,18509,18512,18514,18517],{"class":3427,"line":3428},[1291,18500,18501],{"class":3431},"predictions ",[1291,18503,3738],{"class":3435},[1291,18505,18506],{"class":3812}," build_predictions",[1291,18508,3816],{"class":3435},[1291,18510,18511],{"class":3812},"signals",[1291,18513,3566],{"class":3435},[1291,18515,18516],{"class":3812}," requests",[1291,18518,3827],{"class":3435},[73,18520,18521],{},"After the prediction logic is implemented, and the resulting predictions are stored in a predictions table that updates as new data arrives, these predictions can be persisted in a separate Delta Lake table for downstream evaluation. Additionally, the predictions should be published back to the requester's Kafka topic to ensure real-time arrival tracking.",[73,18523,18524],{},"This results in code similar to the following:",[3418,18526,18528],{"className":3420,"code":18527,"language":3422,"meta":23,"style":23},"pw.io.kafka.write(predictions, rdkafka_settings(), os.environ[\"KAFKA_PREDICTIONS_TOPIC\"])\npw.io.deltalake.write(predictions, os.environ[\"PREDICTIONS_DELTA_TABLE\"])\n",[3061,18529,18530,18575],{"__ignoreMap":23},[1291,18531,18532,18534,18536,18538,18540,18542,18544,18546,18548,18551,18553,18556,18558,18560,18562,18564,18566,18568,18571,18573],{"class":3427,"line":3428},[1291,18533,3841],{"class":3431},[1291,18535,694],{"class":3435},[1291,18537,4078],{"class":3457},[1291,18539,694],{"class":3435},[1291,18541,18056],{"class":3457},[1291,18543,694],{"class":3435},[1291,18545,9700],{"class":3812},[1291,18547,3816],{"class":3435},[1291,18549,18550],{"class":3812},"predictions",[1291,18552,3566],{"class":3435},[1291,18554,18555],{"class":3812}," rdkafka_settings",[1291,18557,10858],{"class":3435},[1291,18559,5236],{"class":3812},[1291,18561,694],{"class":3435},[1291,18563,3685],{"class":3457},[1291,18565,3688],{"class":3435},[1291,18567,3691],{"class":3435},[1291,18569,18570],{"class":3439},"KAFKA_PREDICTIONS_TOPIC",[1291,18572,3691],{"class":3435},[1291,18574,9572],{"class":3435},[1291,18576,18577,18579,18581,18583,18585,18587,18589,18591,18593,18595,18597,18599,18601,18603,18605,18607,18610,18612],{"class":3427,"line":24},[1291,18578,3841],{"class":3431},[1291,18580,694],{"class":3435},[1291,18582,4078],{"class":3457},[1291,18584,694],{"class":3435},[1291,18586,18230],{"class":3457},[1291,18588,694],{"class":3435},[1291,18590,9700],{"class":3812},[1291,18592,3816],{"class":3435},[1291,18594,18550],{"class":3812},[1291,18596,3566],{"class":3435},[1291,18598,5236],{"class":3812},[1291,18600,694],{"class":3435},[1291,18602,3685],{"class":3457},[1291,18604,3688],{"class":3435},[1291,18606,3691],{"class":3435},[1291,18608,18609],{"class":3439},"PREDICTIONS_DELTA_TABLE",[1291,18611,3691],{"class":3435},[1291,18613,9572],{"class":3435},[3189,18615,18617],{"id":18616},"ground-truth-computation-pipeline","Ground Truth Computation Pipeline",[73,18619,18620],{},"The ground truth computation pipeline is essential for evaluating the quality of predictions. Once the predicted values and the actual outcomes are available, they can be compared to assess accuracy. This is especially important for monitoring: if performance metrics deteriorate following a deployment, it becomes clear that something may have gone wrong. Without this feedback loop, the system would be operating blindly, making ground truth computation useful and necessary.",[73,18622,18623],{},"This component can be implemented similarly to the others. A Pathway process is created that reads both the IoT signals and the ETA requests. A custom reducer can then be used to monitor each transport unit's expected arrivals. When a transport unit's location matches the destination of an order and there are no other pending deliveries, the order can be marked as completed, providing a ground-truth timestamp.",[73,18625,18626,18627,18630],{},"This logic must run in a separate process. While prediction is done in real time, ground truth events occur at an unknown point in the future, possibly 30 minutes or even several hours after the prediction is made. By decoupling the ground truth computation from the prediction process, the system remains responsive and scalable, without blocking or delaying predictions. As in the previous section, a table of prepared signals and a table of ETA requests are required. With these inputs, the ground truth computation logic can be implemented within a ",[3061,18628,18629],{},"build_ground_truths"," method.",[3418,18632,18634],{"className":3420,"code":18633,"language":3422,"meta":23,"style":23},"ground_truths = build_ground_truths(signals, requests)\n",[3061,18635,18636],{"__ignoreMap":23},[1291,18637,18638,18641,18643,18646,18648,18650,18652,18654],{"class":3427,"line":3428},[1291,18639,18640],{"class":3431},"ground_truths ",[1291,18642,3738],{"class":3435},[1291,18644,18645],{"class":3812}," build_ground_truths",[1291,18647,3816],{"class":3435},[1291,18649,18511],{"class":3812},[1291,18651,3566],{"class":3435},[1291,18653,18516],{"class":3812},[1291,18655,3827],{"class":3435},[73,18657,18658],{},"Once computed, the ground truths can be written to a Delta Lake table for further evaluation:",[3418,18660,18662],{"className":3420,"code":18661,"language":3422,"meta":23,"style":23},"pw.io.deltalake.write(ground_truths, os.environ[\"GROUND_TRUTHS_DELTA_TABLE\"])\n",[3061,18663,18664],{"__ignoreMap":23},[1291,18665,18666,18668,18670,18672,18674,18676,18678,18680,18682,18685,18687,18689,18691,18693,18695,18697,18700,18702],{"class":3427,"line":3428},[1291,18667,3841],{"class":3431},[1291,18669,694],{"class":3435},[1291,18671,4078],{"class":3457},[1291,18673,694],{"class":3435},[1291,18675,18230],{"class":3457},[1291,18677,694],{"class":3435},[1291,18679,9700],{"class":3812},[1291,18681,3816],{"class":3435},[1291,18683,18684],{"class":3812},"ground_truths",[1291,18686,3566],{"class":3435},[1291,18688,5236],{"class":3812},[1291,18690,694],{"class":3435},[1291,18692,3685],{"class":3457},[1291,18694,3688],{"class":3435},[1291,18696,3691],{"class":3435},[1291,18698,18699],{"class":3439},"GROUND_TRUTHS_DELTA_TABLE",[1291,18701,3691],{"class":3435},[1291,18703,9572],{"class":3435},[3189,18705,18707],{"id":18706},"evaluation-of-the-predictions-pipeline","Evaluation of the Predictions Pipeline",[73,18709,18710,18711,18714],{},"At this stage, both the prediction and evaluation processes are in place. The next step is to read data from these two sources and join them using the ",[3061,18712,18713],{},"order_id"," field. Once joined, each pair of records contains both the predicted and the actual arrival time. This information allows the deviation between the two to be calculated and written to a new table.",[73,18716,18717],{},"It can be assumed that the schema for prediction and ground truth entries is the same. In this case, they can be read using the Delta Lake connector as follows:",[3418,18719,18721],{"className":3420,"code":18720,"language":3422,"meta":23,"style":23},"predictions = pw.io.deltalake.read(os.environ[\"PREDICTIONS_DELTA_TABLE\"])\nground_truths = pw.io.deltalake.read(os.environ[\"GROUND_TRUTHS_DELTA_TABLE\"])\n",[3061,18722,18723,18761],{"__ignoreMap":23},[1291,18724,18725,18727,18729,18731,18733,18735,18737,18739,18741,18743,18745,18747,18749,18751,18753,18755,18757,18759],{"class":3427,"line":3428},[1291,18726,18501],{"class":3431},[1291,18728,3738],{"class":3435},[1291,18730,4073],{"class":3431},[1291,18732,694],{"class":3435},[1291,18734,4078],{"class":3457},[1291,18736,694],{"class":3435},[1291,18738,18230],{"class":3457},[1291,18740,694],{"class":3435},[1291,18742,4088],{"class":3812},[1291,18744,3816],{"class":3435},[1291,18746,3680],{"class":3812},[1291,18748,694],{"class":3435},[1291,18750,3685],{"class":3457},[1291,18752,3688],{"class":3435},[1291,18754,3691],{"class":3435},[1291,18756,18609],{"class":3439},[1291,18758,3691],{"class":3435},[1291,18760,9572],{"class":3435},[1291,18762,18763,18765,18767,18769,18771,18773,18775,18777,18779,18781,18783,18785,18787,18789,18791,18793,18795,18797],{"class":3427,"line":24},[1291,18764,18640],{"class":3431},[1291,18766,3738],{"class":3435},[1291,18768,4073],{"class":3431},[1291,18770,694],{"class":3435},[1291,18772,4078],{"class":3457},[1291,18774,694],{"class":3435},[1291,18776,18230],{"class":3457},[1291,18778,694],{"class":3435},[1291,18780,4088],{"class":3812},[1291,18782,3816],{"class":3435},[1291,18784,3680],{"class":3812},[1291,18786,694],{"class":3435},[1291,18788,3685],{"class":3457},[1291,18790,3688],{"class":3435},[1291,18792,3691],{"class":3435},[1291,18794,18699],{"class":3439},[1291,18796,3691],{"class":3435},[1291,18798,9572],{"class":3435},[73,18800,18801,18802,18807],{},"One important consideration is that, for a single ETA request, multiple predictions may be generated over time as new data becomes available. This means a strategy is needed to aggregate these predictions when evaluating accuracy. A basic approach would be to compute the ",[77,18803,18806],{"href":18804,"rel":18805},"https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FMean_absolute_error",[81],"Mean Absolute Error (MAE)"," across all records. However, MAE alone does not account for how prediction accuracy varies with the time or distance remaining before arrival. A more descriptive evaluation might involve calculating MAE within specific buckets, such as predictions made less than 5 minutes before arrival, between 5 and 30 minutes, and beyond.",[73,18809,18810],{},"In practice, many different metrics could be useful depending on the context. A flexible solution was adopted in our La Poste use case: the Pathway Live Data Framework ETL task is responsible only for joining the predictions and ground truths and calculating the raw error for each individual prediction. These results are then stored in a Postgres database, and the statistical analysis and visualization are handled separately in a BI tool. This separation of concerns provides greater flexibility in monitoring and evaluation.",[73,18812,18813],{},"The raw evaluation data can be exported in various formats. For example, it can be written to a CSV file for offline analysis using tools like Pandas or Excel. It can also use a Postgres database, as pointed out above:",[3418,18815,18817],{"className":3420,"code":18816,"language":3422,"meta":23,"style":23},"evaluation = evaluate_predictions(predictions, ground_truths)\npw.io.csv.write(evaluation, os.environ[\"EVALUATION_RESULTS_TABLE\"])\npw.io.postgres.write(evaluation, get_postgres_settings(), os.environ[\"EVALUATION_POSTGRES_TABLE\"])\n",[3061,18818,18819,18840,18880],{"__ignoreMap":23},[1291,18820,18821,18824,18826,18829,18831,18833,18835,18838],{"class":3427,"line":3428},[1291,18822,18823],{"class":3431},"evaluation ",[1291,18825,3738],{"class":3435},[1291,18827,18828],{"class":3812}," evaluate_predictions",[1291,18830,3816],{"class":3435},[1291,18832,18550],{"class":3812},[1291,18834,3566],{"class":3435},[1291,18836,18837],{"class":3812}," ground_truths",[1291,18839,3827],{"class":3435},[1291,18841,18842,18844,18846,18848,18850,18852,18854,18856,18858,18861,18863,18865,18867,18869,18871,18873,18876,18878],{"class":3427,"line":24},[1291,18843,3841],{"class":3431},[1291,18845,694],{"class":3435},[1291,18847,4078],{"class":3457},[1291,18849,694],{"class":3435},[1291,18851,16275],{"class":3457},[1291,18853,694],{"class":3435},[1291,18855,9700],{"class":3812},[1291,18857,3816],{"class":3435},[1291,18859,18860],{"class":3812},"evaluation",[1291,18862,3566],{"class":3435},[1291,18864,5236],{"class":3812},[1291,18866,694],{"class":3435},[1291,18868,3685],{"class":3457},[1291,18870,3688],{"class":3435},[1291,18872,3691],{"class":3435},[1291,18874,18875],{"class":3439},"EVALUATION_RESULTS_TABLE",[1291,18877,3691],{"class":3435},[1291,18879,9572],{"class":3435},[1291,18881,18882,18884,18886,18888,18890,18893,18895,18897,18899,18901,18903,18906,18908,18910,18912,18914,18916,18918,18921,18923],{"class":3427,"line":675},[1291,18883,3841],{"class":3431},[1291,18885,694],{"class":3435},[1291,18887,4078],{"class":3457},[1291,18889,694],{"class":3435},[1291,18891,18892],{"class":3457},"postgres",[1291,18894,694],{"class":3435},[1291,18896,9700],{"class":3812},[1291,18898,3816],{"class":3435},[1291,18900,18860],{"class":3812},[1291,18902,3566],{"class":3435},[1291,18904,18905],{"class":3812}," get_postgres_settings",[1291,18907,10858],{"class":3435},[1291,18909,5236],{"class":3812},[1291,18911,694],{"class":3435},[1291,18913,3685],{"class":3457},[1291,18915,3688],{"class":3435},[1291,18917,3691],{"class":3435},[1291,18919,18920],{"class":3439},"EVALUATION_POSTGRES_TABLE",[1291,18922,3691],{"class":3435},[1291,18924,9572],{"class":3435},[73,18926,18927],{},"Alternatively, a custom output connector can stream the data directly into Grafana or another monitoring tool for real-time visualization.",[73,18929,18930],{},"The core pipeline is complete. In the next section, the advantages and trade-offs of this modular architecture will be discussed, including when such decomposition is most beneficial and how to address scaling challenges effectively.",[140,18932,18934],{"id":18933},"observations","Observations",[73,18936,18937],{},"Whether to decompose a system into multiple pipelines or keep it as a single unit is an old question. It closely resembles the debate between monolithic and microservices architectures. In this section, we will discuss the pros and cons of splitting the pipeline and insights from our experience implementing this approach for La Poste's ETA prediction task, where such decomposition proved effective.",[3189,18939,18941],{"id":18940},"microservices-benefits","Microservices Benefits",[73,18943,18944],{},"By implementing this decomposition into different microservices, several clear advantages were observed:",[145,18946,18947,18953,18959],{},[148,18948,18949,18952],{},[169,18950,18951],{},"Improved scalability",". The Pathway Live Data Framework supports configurable parallelism, allowing each pipeline to scale independently. This means that only the components under load, such as the prediction pipeline, need additional resources without affecting others, like alerting or evaluation.",[148,18954,18955,18958],{},[169,18956,18957],{},"Access to intermediate data",". With clearly separated stages, inspecting and analyzing intermediate outputs becomes easier. This is particularly helpful for tracing the source of bad data and debugging complex behaviors.",[148,18960,18961,18964],{},[169,18962,18963],{},"Enhanced fault tolerance",". Isolating each pipeline ensures that failures in one component, such as a runtime exception in the evaluation pipeline, do not cascade or disrupt other processes, like predictions. This separation leads to a more robust and resilient system overall.",[3189,18966,18968],{"id":18967},"new-challenges","New Challenges",[73,18970,18971],{},"While the benefits of using microservices are significant, several challenges do arise and need to be addressed:",[145,18973,18974,18980,18986],{},[148,18975,18976,18979],{},[169,18977,18978],{},"Increased maintenance overhead",". Changing the schema of an intermediate table produced by one pipeline often requires synchronized updates in downstream pipelines, and sometimes even adjustments to historical data.",[148,18981,18982,18985],{},[169,18983,18984],{},"More involved deployment process",". Instead of deploying a single monolithic pipeline, multiple components (in our case, four) need to be deployed, monitored, and coordinated. This adds operational complexity during updates or rollbacks.",[148,18987,18988,18991],{},[169,18989,18990],{},"Higher storage requirements",". Storing intermediate results, such as the cleaned data, incurs additional storage costs. This trade-off needs to be considered, especially for high-volume streams.",[140,18993,18995],{"id":18994},"scaling","Scaling",[73,18997,18998],{},"As you may have noticed, the architecture described above uses Delta Lake as the storage layer for intermediate data. Delta Lake is a very convenient solution—it doesn't require deploying additional services and can operate directly over backends like S3. However, there are a few nuances worth keeping in mind.",[73,19000,19001],{},"One key consideration is how data is ingested and stored. In our case, data is read from Kafka with low latency and written to Delta Lake in batches. These writes happen fairly often; each commit generates a new Parquet file and an update to the transaction log. Over time, this results in many small files and metadata entries, which can eventually lead to performance degradation.",[73,19003,19004,19005,19008,19009,694],{},"This isn't a problem, but it requires proper handling. The main technique to manage this growth is partitioning - note that this differs from Kafka partitioning. In Delta Lake, partitioning involves designating one or more columns whose values determine the directory structure in which files are stored. For example, you can derive a \"day\" column from the timestamp field and use it as a partition key via the ",[3061,19006,19007],{},"partition_columns"," parameter in ",[3061,19010,19011],{},"pw.io.deltalake.write",[73,19013,19014,19015,19018],{},"Then, if the partitioning is based on a daily scale and the partitioning column is named ",[3061,19016,19017],{},"timestamp_day",", the output configuration would look as follows:",[3418,19020,19022],{"className":3420,"code":19021,"language":3422,"meta":23,"style":23},"pw.io.deltalake.write(\n    table,\n    \"\u002Fpath\u002Fto\u002Flake\",\n    partition_columns=[table.timestamp_day],  # enable partitioning by `timestamp_day`\n)\n",[3061,19023,19024,19042,19048,19059,19079],{"__ignoreMap":23},[1291,19025,19026,19028,19030,19032,19034,19036,19038,19040],{"class":3427,"line":3428},[1291,19027,3841],{"class":3431},[1291,19029,694],{"class":3435},[1291,19031,4078],{"class":3457},[1291,19033,694],{"class":3435},[1291,19035,18230],{"class":3457},[1291,19037,694],{"class":3435},[1291,19039,9700],{"class":3812},[1291,19041,3874],{"class":3435},[1291,19043,19044,19046],{"class":3427,"line":24},[1291,19045,18241],{"class":3812},[1291,19047,4107],{"class":3435},[1291,19049,19050,19052,19055,19057],{"class":3427,"line":675},[1291,19051,4382],{"class":3435},[1291,19053,19054],{"class":3439},"\u002Fpath\u002Fto\u002Flake",[1291,19056,3691],{"class":3435},[1291,19058,4107],{"class":3435},[1291,19060,19061,19064,19067,19069,19071,19073,19076],{"class":3427,"line":3542},[1291,19062,19063],{"class":3819},"    partition_columns",[1291,19065,19066],{"class":3435},"=[",[1291,19068,16104],{"class":3812},[1291,19070,694],{"class":3435},[1291,19072,19017],{"class":3457},[1291,19074,19075],{"class":3435},"],",[1291,19077,19078],{"class":3673},"  # enable partitioning by `timestamp_day`\n",[1291,19080,19081],{"class":3427,"line":3547},[1291,19082,3827],{"class":3435},[73,19084,19085],{},"Partitioning enables several optimizations: files within a partition can be compacted over time, reducing fragmentation and improving query performance. Similarly, the Delta transaction log can be periodically cleaned up - after data is compacted and old versions become obsolete, they can be safely removed.",[73,19087,19088],{},"These maintenance strategies allow the system to remain efficient and manageable, even when operating continuously over long timeframes - months or even years.",[140,19090,19092],{"id":19091},"going-further","Going further",[73,19094,19095],{},"The microservice-style architecture not only enabled the construction of the primary pipeline but also unlocked several opportunities to reuse computation results and build additional production services with minimal effort:",[145,19097,19098,19104,19110],{},[148,19099,19100,19103],{},[169,19101,19102],{},"Anomaly detection and alerting",". If the predictions deviate significantly from the ground truth, this may indicate an issue, such as a buggy deployment or unexpected data shift. A separate Pathway pipeline can monitor for anomalies and trigger alerts via a Slack connector or other notification channel.",[148,19105,19106,19109],{},[169,19107,19108],{},"Prediction model improvement",". Combining cleaned input data and ground truth values forms a high-quality dataset ideal for training improved prediction models. This data can be collected over time and used offline to refine and validate new approaches.",[148,19111,19112,19115],{},[169,19113,19114],{},"A\u002FB testing",". When experimenting with a new prediction strategy, running it in parallel with the existing one is easy. For example, transport units can be sharded by their IDs, assigning a fixed percentage to the stable model and the rest to the experimental one. Since the architecture supports running multiple pipelines simultaneously, this testing becomes straightforward.",[140,19117,8794],{"id":8793},[73,19119,19120],{},"Of course, microservice architecture is not a silver bullet. It should be adopted thoughtfully, carefully considering whether it's appropriate for the specific task at hand. When used wisely, it can be a powerful tool for simplifying complex data workflows and increasing system reliability.",[73,19122,19123],{},"This article uses a real-world example from our work with La Poste to show how a Pathway Live Data Framework pipeline can be effectively split into multiple microservices. You now know how to synchronize pipelines using Delta Lake for pipeline-to-pipeline communications, understand the trade-offs involved, and manage potential challenges such as intermediate data growth. With this foundation, you're better equipped to design scalable and maintainable streaming architectures with Pathway Live Data Framework.",[73,19125,19126,19127,19131,19132,694],{},"If you need any help with pipelining, feel free to message us on ",[77,19128,19130],{"href":3072,"rel":19129},[81],"Discord"," or submit a feature request on ",[77,19133,19135],{"href":8868,"rel":19134},[81],"GitHub Issues",[5019,19137,19138],{},"html pre.shiki code .sJ14y, html code.shiki .sJ14y{--shiki-default:#C792EA}html pre.shiki code .s5Dmg, html code.shiki .s5Dmg{--shiki-default:#FFCB6B}html pre.shiki code .sAklC, html code.shiki .sAklC{--shiki-default:#89DDFF}html pre.shiki code .s0W1g, html code.shiki .s0W1g{--shiki-default:#BABED8}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html pre.shiki code .s-wAU, html code.shiki .s-wAU{--shiki-default:#F07178}html pre.shiki code .sdLwU, html code.shiki .sdLwU{--shiki-default:#82AAFF}html pre.shiki code .sfyAc, html code.shiki .sfyAc{--shiki-default:#C3E88D}html pre.shiki code .s7ZW3, html code.shiki .s7ZW3{--shiki-default:#BABED8;--shiki-default-font-style:italic}html pre.shiki code .sx098, html code.shiki .sx098{--shiki-default:#F78C6C}html pre.shiki code .s6cf3, html code.shiki .s6cf3{--shiki-default:#89DDFF;--shiki-default-font-style:italic}html pre.shiki code .saEQR, html code.shiki .saEQR{--shiki-default:#676E95;--shiki-default-font-style:italic}",{"title":23,"searchDepth":24,"depth":24,"links":19140},[19141,19142,19143,19144,19150,19154,19155,19156],{"id":17870,"depth":24,"text":17871},{"id":17883,"depth":24,"text":17884},{"id":17915,"depth":24,"text":17916},{"id":17939,"depth":24,"text":17940,"children":19145},[19146,19147,19148,19149],{"id":17946,"depth":675,"text":17947},{"id":18272,"depth":675,"text":18273},{"id":18616,"depth":675,"text":18617},{"id":18706,"depth":675,"text":18707},{"id":18933,"depth":24,"text":18934,"children":19151},[19152,19153],{"id":18940,"depth":675,"text":18941},{"id":18967,"depth":675,"text":18968},{"id":18994,"depth":24,"text":18995},{"id":19091,"depth":24,"text":19092},{"id":8793,"depth":24,"text":8794},"This article shows you how La Poste dealt with complex computational pipelines by splitting them into different microservices using Pathway Live Data Framework. Such a microservice architecture improves performance and stability and allows for reusing the intermediate results in other processes.",{"layout":6061,"thumbnail":19159,"date":19161,"tags":19162,"keywords":19164,"hidden":35},{"src":19160,"provider":11,"contain":35},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fpathway-laposte-microservices-th.png","2025-05-15",[5047,19163],"data-pipeline",[19165,19166,19167,19168,19169,19170,19171,19172],"ETL","Microservices","Microservice architecture","Micropipelines","Delta tables","S3","La Poste","ETA","\u002Fframework\u002Fblog\u002Fpathway-laposte-microservices",{"title":17833,"description":19157},{"loc":19173},"framework\u002Fblog\u002F1020.pathway-laposte-microservices","C_D9G36n-bzVOosiy422hLrSjqJhUx_nCxU1NRyrR08",{"id":19179,"title":19180,"author":19181,"body":19182,"description":19509,"extension":27,"meta":19510,"navigation":35,"path":19515,"seo":19516,"sitemap":19517,"stem":19518,"__hash__":19519},"content\u002Fframework\u002Fblog\u002F1025.local-embeddings-batch-udfs.md","50× Faster Local Embeddings with Batch UDFs",{"id":5070,"url":5071,"name":5072,"description":5073,"img":10,"provider":11,"linkedin":5074},{"type":13,"value":19183,"toc":19502},[19184,19188,19191,19195,19203,19207,19221,19245,19249,19252,19260,19264,19267,19281,19288,19334,19337,19341,19352,19398,19404,19489,19492,19499],[68,19185,19187],{"id":19186},"how-we-improved-the-performance-of-the-local-embeddings-with-the-introduction-of-the-batch-udfs","How we improved the performance of the local embeddings with the introduction of the batch UDFs",[73,19189,19190],{},"Local embeddings are important for your real-time AI applications from RAG to similarity search. They let you generate vectors on your own infrastructure, keeping latency low and your data private. But if your pipeline embeds data one item at a time, it will struggle to keep up when the volume grows. We saw this bottleneck where embedding documents sequentially was far too slow. Our solution was to introduce batch UDFs, a change that makes local embedding generation 50× faster without sacrificing streaming performance. In this post, we’ll show you how we did it and what it means for your pipelines.",[3189,19192,19194],{"id":19193},"the-local-embeddings-performance-challenge","The Local Embeddings Performance Challenge",[73,19196,19197,19198,19202],{},"In production streaming systems, locally computed embeddings pose a unique performance challenge. Unlike cloud API embeddings (by providers like OpenAI, Cohere, etc.) which can batch requests internally, local embedding models typically process one item at a time. As a result, much of your hardware’s potential throughput goes unused. When you need to embed thousands of documents per second, sequential processing can grind the pipeline to a halt. This is especially problematic for use cases such as realtime semantic search or ",[77,19199,19201],{"href":19200},"\u002Fblog\u002Fretrieval-augmented-generation-beginners-guide-rag-apps","retrieval augmented generation (RAG)",", where new data must be indexed within seconds to stay useful.",[140,19204,19206],{"id":19205},"why-are-batch-udfs-needed","Why are batch UDFs needed?",[73,19208,19209,19210,19213,19214,19217,19218,19220],{},"The User Defined Functions (UDFs) in Pathway Live Data Framework are designed to operate on each row separately. For some operations, however, it make sense to batch the computation, even though there would be no change in the result. An example of this is matrix multiplication vs matrix by vector multiplication - multiplication of two ",[3061,19211,19212],{},"n x n"," matrices is faster than ",[3061,19215,19216],{},"n"," multiplications of ",[3061,19219,19212],{}," matrix by a n-length vector. So, when tasked with matrix by vector multiplication, it makes sense to combine all the vectors into one matrix, do a matrix multiplication and then extract results for each vector.",[73,19222,19223,19224,19227,19228,19233,19234,19238,19239,19244],{},"With the prominence of matrices in the machine learning models, these are natural candidates for getting an improvement from batching. While this doesn’t matter for API-based embedding models (they handle batching on the server side), we needed to add batching for locally computed embeddings. In our codebase, this meant enhancing the ",[77,19225,14532],{"href":19226},"\u002Fdevelopers\u002Fuser-guide\u002Fllm-xpack\u002Fembedders#sentencetransformerembedder"," (which uses the ",[77,19229,19232],{"href":19230,"rel":19231},"https:\u002F\u002Fsbert.net\u002F",[81],"Sentence Transformers library"," to generate embeddings) and ",[77,19235,19237],{"href":19236},"\u002Fdevelopers\u002Ftemplates\u002Frag-customization\u002Fllm-chats#hugging-face-pipeline","HFPipelineChat"," (a wrapper around the ",[77,19240,19243],{"href":19241,"rel":19242},"https:\u002F\u002Fhuggingface.co\u002Fdocs\u002Ftransformers\u002Fen\u002Findex",[81],"Transformers library",").",[140,19246,19248],{"id":19247},"what-we-did-in-pathway-live-data-framework","What we did in Pathway Live Data Framework",[73,19250,19251],{},"This motivated us to expand our UDFs, which led to the introduction of batch UDFs. When these are used, the engine allows the UDF to send multiple rows of data simultaneously, with the expectation that the UDF will operate on lists of data points. Furthermore, the changes made for the sake of UDFs are more general and allow us to optimize other operators that can benefit from batching.",[73,19253,19254,19255,19259],{},"As Pathway Live Data Framework is written with time consistency in mind, the UDF batching is only possible for rows that have the same ",[77,19256,19258],{"href":19257},"\u002Fdevelopers\u002Fuser-guide\u002Fintroduction\u002Fconcepts#event-time-vs-processing-time","processing time",". This, however, is not a drawback, since with only single points of data coming to the pipeline, the improved performance is not needed.",[140,19261,19263],{"id":19262},"benchmarking-performance-of-local-embeddings","Benchmarking performance of local embeddings",[73,19265,19266],{},"We ran two tests to measure how batch UDFs accelerate local embedding generation:",[665,19268,19269,19275],{},[148,19270,19271,19274],{},[169,19272,19273],{},"1000 sentences"," – embedding 1,000 diverse sentences generated by an LLM.",[148,19276,19277,19280],{},[169,19278,19279],{},"575 Wikipedia articles"," – embedding a collection of Wikipedia articles (about 3 million tokens in total).",[73,19282,19283,19284,19287],{},"Each test was run using the ",[3061,19285,19286],{},"intfloat\u002Fe5-large-v2"," model with three settings: no batching, a batch size of 32, and a large batch (all items at once: 1000 for sentences, 575 for articles). The table below shows the total embedding time in seconds for each scenario:",[16104,19289,19290,19305],{},[16107,19291,19292],{},[16110,19293,19294,19296,19299,19302],{},[16113,19295],{},[16113,19297,19298],{},"without batching",[16113,19300,19301],{},"batches of size 32",[16113,19303,19304],{},"batches of size 1024",[16162,19306,19307,19321],{},[16110,19308,19309,19312,19315,19318],{},[16167,19310,19311],{},"articles on wikipedia",[16167,19313,19314],{},"647.716",[16167,19316,19317],{},"342.236",[16167,19319,19320],{},"239.721",[16110,19322,19323,19325,19328,19331],{},[16167,19324,19273],{},[16167,19326,19327],{},"2536.602",[16167,19329,19330],{},"130.530",[16167,19332,19333],{},"43.608",[73,19335,19336],{},"As you can see, especially for the sentences, which are mostly consistent in length, the improvement is immense, with computations without batches taking over 50 times more time than with batches of size 1024.",[140,19338,19340],{"id":19339},"how-to-use-batch-udfs","How to use Batch UDFs",[73,19342,19343,19344,19347,19348,19351],{},"While the introduction of the batch UDFs was motivated by its application to RAGs, these are much more general and you can use them anywhere you expect to get a performance improvement from batching. To do that, set the ",[3061,19345,19346],{},"max_batch_size"," in the ",[3061,19349,19350],{},"pw.udf"," decorator:",[3418,19353,19355],{"className":3420,"code":19354,"language":3422,"meta":23,"style":23},"@pw.udf(max_batch_size=32)\ndef batched_udf(...):\n  ...\n",[3061,19356,19357,19380,19393],{"__ignoreMap":23},[1291,19358,19359,19362,19364,19366,19369,19371,19373,19375,19378],{"class":3427,"line":3428},[1291,19360,19361],{"class":3435},"@",[1291,19363,3841],{"class":3812},[1291,19365,694],{"class":3435},[1291,19367,19368],{"class":3812},"udf",[1291,19370,3816],{"class":3435},[1291,19372,19346],{"class":3819},[1291,19374,3738],{"class":3435},[1291,19376,19377],{"class":3451},"32",[1291,19379,3827],{"class":3435},[1291,19381,19382,19384,19387,19389,19391],{"class":3427,"line":24},[1291,19383,11398],{"class":7739},[1291,19385,19386],{"class":3812}," batched_udf",[1291,19388,3816],{"class":3435},[1291,19390,1293],{"class":3431},[1291,19392,11948],{"class":3435},[1291,19394,19395],{"class":3427,"line":675},[1291,19396,19397],{"class":3431},"  ...\n",[73,19399,19400,19401,19403],{},"or in the ",[3061,19402,9410],{}," constructor",[3418,19405,19407],{"className":3420,"code":19406,"language":3422,"meta":23,"style":23},"class BatchedUDF(pw.UDF):\n  def __init__(self):\n    super().__init__(max_batch_size=32)\n  \n  # implementation of udf\n  def __wrapped(self, ...)\n",[3061,19408,19409,19426,19442,19462,19466,19471],{"__ignoreMap":23},[1291,19410,19411,19413,19416,19418,19420,19422,19424],{"class":3427,"line":3428},[1291,19412,16356],{"class":7739},[1291,19414,19415],{"class":6356}," BatchedUDF",[1291,19417,3816],{"class":3435},[1291,19419,3841],{"class":6356},[1291,19421,694],{"class":3435},[1291,19423,9410],{"class":3457},[1291,19425,11948],{"class":3435},[1291,19427,19428,19431,19434,19436,19440],{"class":3427,"line":24},[1291,19429,19430],{"class":7739},"  def",[1291,19432,19433],{"class":3812}," __init__",[1291,19435,3816],{"class":3435},[1291,19437,19439],{"class":19438},"st21m","self",[1291,19441,11948],{"class":3435},[1291,19443,19444,19447,19449,19452,19454,19456,19458,19460],{"class":3427,"line":675},[1291,19445,19446],{"class":6356},"    super",[1291,19448,10341],{"class":3435},[1291,19450,19451],{"class":3812},"__init__",[1291,19453,3816],{"class":3435},[1291,19455,19346],{"class":3819},[1291,19457,3738],{"class":3435},[1291,19459,19377],{"class":3451},[1291,19461,3827],{"class":3435},[1291,19463,19464],{"class":3427,"line":3542},[1291,19465,8054],{"class":3431},[1291,19467,19468],{"class":3427,"line":3547},[1291,19469,19470],{"class":3673},"  # implementation of udf\n",[1291,19472,19473,19475,19478,19480,19482,19484,19487],{"class":3427,"line":3572},[1291,19474,19430],{"class":7739},[1291,19476,19477],{"class":3812}," __wrapped",[1291,19479,3816],{"class":3435},[1291,19481,19439],{"class":19438},[1291,19483,3566],{"class":3435},[1291,19485,19486],{"class":3431}," ...",[1291,19488,3827],{"class":3435},[73,19490,19491],{},"and change your UDF function to operate on lists of arguments.",[73,19493,19494,19495,694],{},"More details on using the batch UDFs are in the ",[77,19496,19498],{"href":19497},"\u002Fdevelopers\u002Fuser-guide\u002Fdata-transformation\u002Fuser-defined-functions#batch-udfs","dedicated guide on the UDFs",[5019,19500,19501],{},"html pre.shiki code .sAklC, html code.shiki .sAklC{--shiki-default:#89DDFF}html pre.shiki code .sdLwU, html code.shiki .sdLwU{--shiki-default:#82AAFF}html pre.shiki code .s7ZW3, html code.shiki .s7ZW3{--shiki-default:#BABED8;--shiki-default-font-style:italic}html pre.shiki code .sx098, html code.shiki .sx098{--shiki-default:#F78C6C}html pre.shiki code .sJ14y, html code.shiki .sJ14y{--shiki-default:#C792EA}html pre.shiki code .s0W1g, html code.shiki .s0W1g{--shiki-default:#BABED8}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html pre.shiki code .s5Dmg, html code.shiki .s5Dmg{--shiki-default:#FFCB6B}html pre.shiki code .s-wAU, html code.shiki .s-wAU{--shiki-default:#F07178}html pre.shiki code .st21m, html code.shiki .st21m{--shiki-default:#F07178;--shiki-default-font-style:italic}html pre.shiki code .saEQR, html code.shiki .saEQR{--shiki-default:#676E95;--shiki-default-font-style:italic}",{"title":23,"searchDepth":24,"depth":24,"links":19503},[19504,19505,19506,19507,19508],{"id":19193,"depth":675,"text":19194},{"id":19205,"depth":24,"text":19206},{"id":19247,"depth":24,"text":19248},{"id":19262,"depth":24,"text":19263},{"id":19339,"depth":24,"text":19340},"Discover how batch UDFs supercharged local embedding generation—achieving up to 50× faster performance in real-time pipelines.",{"layout":90,"thumbnail":19511,"date":19513,"tags":19514,"hidden":35},{"src":19512,"provider":11,"contain":35},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fbatch-udfs.png","2025-07-17",[90,6268],"\u002Fframework\u002Fblog\u002Flocal-embeddings-batch-udfs",{"title":19180,"description":19509},{"loc":19515},"framework\u002Fblog\u002F1025.local-embeddings-batch-udfs","XQtjAa07fm0kdYn0QO4jNZouZzkOqNYTjVH7id8FeR4",{"id":19521,"title":19522,"author":19523,"body":19529,"description":23,"extension":27,"meta":20600,"navigation":35,"path":20606,"seo":20607,"sitemap":20608,"stem":20609,"__hash__":20610},"content\u002Fframework\u002Fblog\u002F699.paddleocr.md","Real-Time OCR with PaddleOCR and Pathway Live Data Framework",{"id":19524,"url":19525,"name":19526,"description":5073,"img":19527,"provider":11,"linkedin":19528},"olivier","olivier-ruas","Olivier Ruas","\u002Fassets\u002Fpictures\u002Fimage_ruas_pathway.jpg","https:\u002F\u002Fwww.linkedin.com\u002Fin\u002Foruas\u002F",{"type":13,"value":19530,"toc":20583},[19531,19534,19537,19549,19553,19556,19559,19573,19576,19580,19583,19586,19592,19596,19599,19610,19614,19625,19643,19650,19653,19660,19685,19693,19697,19706,19760,19765,19844,19852,19856,19859,19863,19866,19870,19873,19885,19890,19898,19902,19910,19995,20000,20066,20070,20073,20111,20115,20122,20125,20342,20345,20385,20388,20391,20395,20398,20410,20416,20503,20510,20517,20521,20539,20562,20565,20572,20574,20577,20580],[68,19532,19522],{"id":19533},"real-time-ocr-with-paddleocr-and-pathway-live-data-framework",[73,19535,19536],{},"Optical Character Recognition (OCR) is a key step in extracting information from documents, whether for data processing or RAG applications.\nWhen dealing with real-time data, the ability to perform OCR on the fly becomes essential.",[73,19538,19539,19540,3126,19545,19548],{},"OCR and real-time processing each present their own challenges.\nThis guide shows how ",[77,19541,19544],{"href":19542,"rel":19543},"https:\u002F\u002Faistudio.baidu.com\u002Fpaddleocr",[81],"PaddleOCR",[77,19546,1279],{"href":19547},"\u002Fframework\u002F"," simplify real-time OCR.\nYou'll learn how to process local files with OCR and integrate the results into a RAG pipeline.",[140,19550,19552],{"id":19551},"why-real-time-ocr-matters","Why Real-Time OCR Matters",[73,19554,19555],{},"Data processing is often boiled down to simple acronyms like ETL (Extract, Transform, Load) or the medallion architecture.\nOn paper, it sounds straightforward: extract the data, process it, and store the results. But in reality, it's rarely that simple.",[73,19557,19558],{},"The challenge lies in the data itself. It's not static or uniform. It's messy, dynamic, and constantly evolving:",[145,19560,19561,19567],{},[148,19562,19563,19566],{},[169,19564,19565],{},"Diverse formats",": Documents can be PDFs, images, slides, or other unstructured formats.",[148,19568,19569,19572],{},[169,19570,19571],{},"Volume and velocity",": Data is continuously created, updated, or deleted, and its volume and speed vary depending on the source.",[73,19574,19575],{},"In practice, you won't be dealing with neat, static CSV files.\nYou need a solution that can extract not just text, but also the structure of the data.\nThis processing must be done efficiently, in real-time, to keep up with the incoming data updates.\nThat's where real-time OCR with Pathway and PaddleOCR comes in, offering a practical way to handle your data as it arrives.",[140,19577,19579],{"id":19578},"how-paddleocr-and-pathway-live-data-framework-work-together","How PaddleOCR and Pathway Live Data Framework Work Together",[73,19581,19582],{},"PaddleOCR, developed by Baidu, is a robust OCR system that supports advanced algorithms for data generation, model training, and inference.\nIt converts documents and images into structured formats like JSON or Markdown, and its GPU compatibility ensures high performance which makes it ideal for real-time use.",[73,19584,19585],{},"By integrating PaddleOCR into Pathway Live Data Framework pipelines, you can process documents as they arrive, without delays.",[1141,19587],{":zoomable":1143,"alt":19588,"src":19589,"style":19590,"className":19591},"PaddleOCR integration in a Pathway Live Data Framework pipeline","\u002Fassets\u002Fcontent\u002Fblog\u002Fpaddleocr\u002Fpaddleocr-pipeline.svg","height: 260px",[133],[140,19593,19595],{"id":19594},"installation-and-setup","Installation and Setup",[73,19597,19598],{},"Before starting, you need to the following:",[665,19600,19601,19607],{},[148,19602,19603,19604],{},"Install the Parsing xpack, ",[3061,19605,19606],{},"pip install \"pathway[xpack-llm-docs]\"",[148,19608,19609],{},"Install PaddleOCR",[3189,19611,19613],{"id":19612},"install-pathways-parsing-xpack","Install Pathway's Parsing xpack",[73,19615,19616,19617,19620,19621,19624],{},"PaddleOCR will be called using ",[3061,19618,19619],{},"PaddleOCRParser",", available in the ",[3061,19622,19623],{},"xpack-llm-docs"," xpack:",[3418,19626,19628],{"className":6347,"code":19627,"language":6349,"meta":23,"style":23},"pip install \"pathway[xpack-llm-docs]\"\n",[3061,19629,19630],{"__ignoreMap":23},[1291,19631,19632,19634,19636,19638,19641],{"class":3427,"line":3428},[1291,19633,6357],{"class":6356},[1291,19635,6360],{"class":3439},[1291,19637,3705],{"class":3435},[1291,19639,19640],{"class":3439},"pathway[xpack-llm-docs]",[1291,19642,3746],{"class":3435},[73,19644,19645,19646,694],{},"For more details, see the  ",[77,19647,19649],{"href":19648},"\u002Fdevelopers\u002Fuser-guide\u002Fintroduction\u002Finstallation\u002F","Pathway installation guide",[3189,19651,19609],{"id":19652},"install-paddleocr",[73,19654,19655,19656,19659],{},"PaddleOCR requires ",[3061,19657,19658],{},"paddlepaddle",". The installation depends on your hardware.\nIf you want to run the OCR on CPU, you can install it with the following pip command:",[3418,19661,19663],{"className":6347,"code":19662,"language":6349,"meta":23,"style":23},"pip install paddlepaddle>=3.2.0\n",[3061,19664,19665],{"__ignoreMap":23},[1291,19666,19667,19669,19671,19674,19677,19680,19682],{"class":3427,"line":3428},[1291,19668,6357],{"class":6356},[1291,19670,6360],{"class":3439},[1291,19672,19673],{"class":3439}," paddlepaddl",[1291,19675,19676],{"class":3431},"e",[1291,19678,19679],{"class":3435},">",[1291,19681,3738],{"class":3439},[1291,19683,19684],{"class":3451},"3.2.0\n",[73,19686,19687,19688,694],{},"For GPU support, follow the instructions on the ",[77,19689,19692],{"href":19690,"rel":19691},"https:\u002F\u002Fwww.paddlepaddle.org.cn\u002Fen\u002Finstall\u002Fquick",[81],"official site",[140,19694,19696],{"id":19695},"using-paddleocr-in-pathway-live-data-framework","Using PaddleOCR in Pathway Live Data Framework",[73,19698,19699,19700,19705],{},"The Pathway Live Data Framework provides the ",[77,19701,19703],{"href":19702},"\u002Fdevelopers\u002Fapi-docs\u002Fpathway-xpacks-llm\u002Fparsers\u002F#pathway.xpacks.llm.parsers.PaddleOCRParser",[3061,19704,19619],{}," to parse images, PDFs, and PPTX slides.",[3418,19707,19709],{"className":3420,"code":19708,"language":3422,"meta":23,"style":23},"from pathway.xpacks.llm.parsers import (\n    PaddleOCRParser,\n)\n\nparser = PaddleOCRParser()\n",[3061,19710,19711,19734,19741,19745,19749],{"__ignoreMap":23},[1291,19712,19713,19715,19717,19719,19721,19723,19725,19727,19730,19732],{"class":3427,"line":3428},[1291,19714,3550],{"class":3475},[1291,19716,3553],{"class":3431},[1291,19718,694],{"class":3435},[1291,19720,3581],{"class":3431},[1291,19722,694],{"class":3435},[1291,19724,3627],{"class":3431},[1291,19726,694],{"class":3435},[1291,19728,19729],{"class":3431},"parsers ",[1291,19731,3476],{"class":3475},[1291,19733,6086],{"class":3435},[1291,19735,19736,19739],{"class":3427,"line":24},[1291,19737,19738],{"class":3431},"    PaddleOCRParser",[1291,19740,4107],{"class":3435},[1291,19742,19743],{"class":3427,"line":675},[1291,19744,3827],{"class":3435},[1291,19746,19747],{"class":3427,"line":3542},[1291,19748,3526],{"emptyLinePlaceholder":35},[1291,19750,19751,19753,19755,19758],{"class":3427,"line":3547},[1291,19752,4522],{"class":3431},[1291,19754,3738],{"class":3435},[1291,19756,19757],{"class":3812}," PaddleOCRParser",[1291,19759,4871],{"class":3435},[73,19761,19762,19764],{},[3061,19763,19619],{}," accepts the following parameters:",[145,19766,19767,19782,19791,19801,19807,19816,19829],{},[148,19768,19769,19772,19773,3126,19775,19778,19779,19781],{},[3061,19770,19771],{},"pipeline",": The Paddle pipeline object you want to use to do the OCR. Currently, ",[3061,19774,19544],{},[3061,19776,19777],{},"PPStructureV3"," pipelines are supported. By default, it uses a ",[3061,19780,19777],{}," pipeline.",[148,19783,19784,19787,19788,694],{},[3061,19785,19786],{},"concatenate_pages",": Whether to concatenate multi-paged documents into a single output. Defaults to ",[3061,19789,19790],{},"False",[148,19792,19793,19796,19797,19800],{},[3061,19794,19795],{},"intermediate_image_format",": Intermediate image format used when converting PDFs to images. Defaults to ",[3061,19798,19799],{},"\"jpg\""," for speed and memory use.",[148,19802,19803,19806],{},[3061,19804,19805],{},"max_image_size",": Maximum allowed size of the images in bytes. Default is 15 MB.",[148,19808,19809,19812,19813,694],{},[3061,19810,19811],{},"downsize_horizontal_width",": Width to which images are downsized if necessary, defaults to ",[3061,19814,19815],{},"1920",[148,19817,19818,19821,19822,19828],{},[3061,19819,19820],{},"cache_strategy",": Defines the caching mechanism. To enable caching, a valid ",[77,19823,19825],{"href":19824},"\u002Fdevelopers\u002Fapi-docs\u002Fudfs#pathway.udfs.CacheStrategy",[3061,19826,19827],{},"pathway.udfs.CacheStrategy"," should be provided. Defaults to None.",[148,19830,19831,19834,19835,5681,19838,19841,19842,694],{},[3061,19832,19833],{},"async_mode",": The execution mode of the OCR, either ",[3061,19836,19837],{},"batch_async",[3061,19839,19840],{},"fully_async",". Defaults to ",[3061,19843,19837],{},[73,19845,19846,19847,694],{},"For more details, see the ",[77,19848,19849],{"href":19702},[3061,19850,19851],{},"PaddleOCRParser API reference",[140,19853,19855],{"id":19854},"example-simple-ocr-pipeline","Example: Simple OCR pipeline",[73,19857,19858],{},"Let's process a local image and extract its text.",[3189,19860,19862],{"id":19861},"prepare-the-data","Prepare the Data",[73,19864,19865],{},"You need some document to parse.\nAs an example, let's use a screenshot of a shopping list:",[1141,19867],{":zoomable":1143,"alt":19868,"src":19869,"style":133},"Shopping List","\u002Fassets\u002Fcontent\u002Fblog\u002Fpaddleocr\u002Fshopping_list.png",[73,19871,19872],{},"You can download this image with the following command:",[3418,19874,19876],{"className":6347,"code":19875,"language":6349,"meta":23,"style":23},"wget https:\u002F\u002Fgithub.com\u002Fpathwaycom\u002Fpathway\u002Ftree\u002Fmain\u002Fexamples\u002Fimages\u002Fshopping_list.png\n",[3061,19877,19878],{"__ignoreMap":23},[1291,19879,19880,19882],{"class":3427,"line":3428},[1291,19881,7203],{"class":6356},[1291,19883,19884],{"class":3439}," https:\u002F\u002Fgithub.com\u002Fpathwaycom\u002Fpathway\u002Ftree\u002Fmain\u002Fexamples\u002Fimages\u002Fshopping_list.png\n",[73,19886,19887,19888,5305],{},"Put it into a ",[3061,19889,4102],{},[73,19891,19892,19893,19897],{},"Note: We use OCR on such a screenshot for the sake of the example here.\nIt's best to avoid working with screenshot and to work directly with the raw data using APIs and the ",[77,19894,19896],{"href":19895},"\u002Fdevelopers\u002Fuser-guide\u002Fconnect\u002Fconnectors\u002Fcustom-python-connectors","Python connector"," when it is possible.\nUse OCR only when accessing the raw data is impossible, such as in an enterprise documentation when there are only PDFs and images.",[3189,19899,19901],{"id":19900},"read-and-parse-the-file","Read and Parse the file",[73,19903,19904,19905,19909],{},"Use the ",[77,19906,19908],{"href":19907},"\u002Fdevelopers\u002Fuser-guide\u002Fconnect\u002Fconnectors\u002Ffs-connector","file system connector"," to read the file in binary format:",[3418,19911,19913],{"className":3420,"code":19912,"language":3422,"meta":23,"style":23},"files_table = pw.io.fs.read(\n    \".\u002Fdata\",\n    format=\"binary\",\n    mode=\"static\",\n    object_size_limit=None,\n    with_metadata=True,\n)\n",[3061,19914,19915,19938,19948,19962,19977,19985,19991],{"__ignoreMap":23},[1291,19916,19917,19920,19922,19924,19926,19928,19930,19932,19934,19936],{"class":3427,"line":3428},[1291,19918,19919],{"class":3431},"files_table ",[1291,19921,3738],{"class":3435},[1291,19923,4073],{"class":3431},[1291,19925,694],{"class":3435},[1291,19927,4078],{"class":3457},[1291,19929,694],{"class":3435},[1291,19931,4083],{"class":3457},[1291,19933,694],{"class":3435},[1291,19935,4088],{"class":3812},[1291,19937,3874],{"class":3435},[1291,19939,19940,19942,19944,19946],{"class":3427,"line":24},[1291,19941,4382],{"class":3435},[1291,19943,3928],{"class":3439},[1291,19945,3691],{"class":3435},[1291,19947,4107],{"class":3435},[1291,19949,19950,19952,19954,19956,19958,19960],{"class":3427,"line":675},[1291,19951,4112],{"class":3819},[1291,19953,3738],{"class":3435},[1291,19955,3691],{"class":3435},[1291,19957,4119],{"class":3439},[1291,19959,3691],{"class":3435},[1291,19961,4107],{"class":3435},[1291,19963,19964,19966,19968,19970,19973,19975],{"class":3427,"line":3542},[1291,19965,5431],{"class":3819},[1291,19967,3738],{"class":3435},[1291,19969,3691],{"class":3435},[1291,19971,19972],{"class":3439},"static",[1291,19974,3691],{"class":3435},[1291,19976,4107],{"class":3435},[1291,19978,19979,19982],{"class":3427,"line":3547},[1291,19980,19981],{"class":3819},"    object_size_limit",[1291,19983,19984],{"class":3435},"=None,\n",[1291,19986,19987,19989],{"class":3427,"line":3572},[1291,19988,4128],{"class":3819},[1291,19990,4131],{"class":3435},[1291,19992,19993],{"class":3427,"line":3614},[1291,19994,3827],{"class":3435},[73,19996,19997,19998,4390],{},"Parse the file with ",[3061,19999,19619],{},[3418,20001,20003],{"className":3420,"code":20002,"language":3422,"meta":23,"style":23},"parser = PaddleOCRParser(concatenate_pages=True)\nparsed_table = files_table.select(parsed_text=parser(pw.this.data)[0][0])\n",[3061,20004,20005,20019],{"__ignoreMap":23},[1291,20006,20007,20009,20011,20013,20015,20017],{"class":3427,"line":3428},[1291,20008,4522],{"class":3431},[1291,20010,3738],{"class":3435},[1291,20012,19757],{"class":3812},[1291,20014,3816],{"class":3435},[1291,20016,19786],{"class":3819},[1291,20018,5645],{"class":3435},[1291,20020,20021,20024,20026,20029,20031,20033,20035,20038,20040,20042,20044,20046,20048,20050,20052,20054,20057,20059,20062,20064],{"class":3427,"line":24},[1291,20022,20023],{"class":3431},"parsed_table ",[1291,20025,3738],{"class":3435},[1291,20027,20028],{"class":3431}," files_table",[1291,20030,694],{"class":3435},[1291,20032,16571],{"class":3812},[1291,20034,3816],{"class":3435},[1291,20036,20037],{"class":3819},"parsed_text",[1291,20039,3738],{"class":3435},[1291,20041,4621],{"class":3812},[1291,20043,3816],{"class":3435},[1291,20045,3841],{"class":3812},[1291,20047,694],{"class":3435},[1291,20049,16845],{"class":3457},[1291,20051,694],{"class":3435},[1291,20053,3935],{"class":3457},[1291,20055,20056],{"class":3435},")[",[1291,20058,9555],{"class":3451},[1291,20060,20061],{"class":3435},"][",[1291,20063,9555],{"class":3451},[1291,20065,9572],{"class":3435},[3189,20067,20069],{"id":20068},"save-the-results","Save the Results",[73,20071,20072],{},"Write the output to a JSON Lines file:",[3418,20074,20076],{"className":3420,"code":20075,"language":3422,"meta":23,"style":23},"pw.io.jsonlines.write(parsed_table, \".\u002Foutput.jsonl\")\n",[3061,20077,20078],{"__ignoreMap":23},[1291,20079,20080,20082,20084,20086,20088,20091,20093,20095,20097,20100,20102,20104,20107,20109],{"class":3427,"line":3428},[1291,20081,3841],{"class":3431},[1291,20083,694],{"class":3435},[1291,20085,4078],{"class":3457},[1291,20087,694],{"class":3435},[1291,20089,20090],{"class":3457},"jsonlines",[1291,20092,694],{"class":3435},[1291,20094,9700],{"class":3812},[1291,20096,3816],{"class":3435},[1291,20098,20099],{"class":3812},"parsed_table",[1291,20101,3566],{"class":3435},[1291,20103,3705],{"class":3435},[1291,20105,20106],{"class":3439},".\u002Foutput.jsonl",[1291,20108,3691],{"class":3435},[1291,20110,3827],{"class":3435},[3189,20112,20114],{"id":20113},"run-the-pipeline","Run the Pipeline",[73,20116,20117,20118,20121],{},"Don't forget the ",[3061,20119,20120],{},"pw.run()",", and that's it!",[73,20123,20124],{},"Here is the entire pipeline:",[3418,20126,20128],{"className":3420,"code":20127,"language":3422,"meta":23,"style":23},"import pathway as pw\nfrom pathway.xpacks.llm.parsers import (\n    PaddleOCRParser,\n)\n\nfiles_table = pw.io.fs.read(\n    \".\u002Fdata\",\n    format=\"binary\",\n    object_size_limit=None,\n    with_metadata=True,\n)\n\nparser = PaddleOCRParser(concatenate_pages=True)\nparsed_table = files_table.select(parsed_text=parser(pw.this.data)[0][0])\n\npw.io.jsonlines.write(parsed_table, \".\u002Foutput.jsonl\")\npw.run()\n",[3061,20129,20130,20140,20162,20168,20172,20176,20198,20208,20222,20228,20234,20238,20242,20256,20298,20302,20332],{"__ignoreMap":23},[1291,20131,20132,20134,20136,20138],{"class":3427,"line":3428},[1291,20133,3476],{"class":3475},[1291,20135,3533],{"class":3431},[1291,20137,3536],{"class":3475},[1291,20139,3539],{"class":3431},[1291,20141,20142,20144,20146,20148,20150,20152,20154,20156,20158,20160],{"class":3427,"line":24},[1291,20143,3550],{"class":3475},[1291,20145,3553],{"class":3431},[1291,20147,694],{"class":3435},[1291,20149,3581],{"class":3431},[1291,20151,694],{"class":3435},[1291,20153,3627],{"class":3431},[1291,20155,694],{"class":3435},[1291,20157,19729],{"class":3431},[1291,20159,3476],{"class":3475},[1291,20161,6086],{"class":3435},[1291,20163,20164,20166],{"class":3427,"line":675},[1291,20165,19738],{"class":3431},[1291,20167,4107],{"class":3435},[1291,20169,20170],{"class":3427,"line":3542},[1291,20171,3827],{"class":3435},[1291,20173,20174],{"class":3427,"line":3547},[1291,20175,3526],{"emptyLinePlaceholder":35},[1291,20177,20178,20180,20182,20184,20186,20188,20190,20192,20194,20196],{"class":3427,"line":3572},[1291,20179,19919],{"class":3431},[1291,20181,3738],{"class":3435},[1291,20183,4073],{"class":3431},[1291,20185,694],{"class":3435},[1291,20187,4078],{"class":3457},[1291,20189,694],{"class":3435},[1291,20191,4083],{"class":3457},[1291,20193,694],{"class":3435},[1291,20195,4088],{"class":3812},[1291,20197,3874],{"class":3435},[1291,20199,20200,20202,20204,20206],{"class":3427,"line":3614},[1291,20201,4382],{"class":3435},[1291,20203,3928],{"class":3439},[1291,20205,3691],{"class":3435},[1291,20207,4107],{"class":3435},[1291,20209,20210,20212,20214,20216,20218,20220],{"class":3427,"line":3640},[1291,20211,4112],{"class":3819},[1291,20213,3738],{"class":3435},[1291,20215,3691],{"class":3435},[1291,20217,4119],{"class":3439},[1291,20219,3691],{"class":3435},[1291,20221,4107],{"class":3435},[1291,20223,20224,20226],{"class":3427,"line":3665},[1291,20225,19981],{"class":3819},[1291,20227,19984],{"class":3435},[1291,20229,20230,20232],{"class":3427,"line":3670},[1291,20231,4128],{"class":3819},[1291,20233,4131],{"class":3435},[1291,20235,20236],{"class":3427,"line":3677},[1291,20237,3827],{"class":3435},[1291,20239,20240],{"class":3427,"line":3877},[1291,20241,3526],{"emptyLinePlaceholder":35},[1291,20243,20244,20246,20248,20250,20252,20254],{"class":3427,"line":3916},[1291,20245,4522],{"class":3431},[1291,20247,3738],{"class":3435},[1291,20249,19757],{"class":3812},[1291,20251,3816],{"class":3435},[1291,20253,19786],{"class":3819},[1291,20255,5645],{"class":3435},[1291,20257,20258,20260,20262,20264,20266,20268,20270,20272,20274,20276,20278,20280,20282,20284,20286,20288,20290,20292,20294,20296],{"class":3427,"line":4519},[1291,20259,20023],{"class":3431},[1291,20261,3738],{"class":3435},[1291,20263,20028],{"class":3431},[1291,20265,694],{"class":3435},[1291,20267,16571],{"class":3812},[1291,20269,3816],{"class":3435},[1291,20271,20037],{"class":3819},[1291,20273,3738],{"class":3435},[1291,20275,4621],{"class":3812},[1291,20277,3816],{"class":3435},[1291,20279,3841],{"class":3812},[1291,20281,694],{"class":3435},[1291,20283,16845],{"class":3457},[1291,20285,694],{"class":3435},[1291,20287,3935],{"class":3457},[1291,20289,20056],{"class":3435},[1291,20291,9555],{"class":3451},[1291,20293,20061],{"class":3435},[1291,20295,9555],{"class":3451},[1291,20297,9572],{"class":3435},[1291,20299,20300],{"class":3427,"line":6038},[1291,20301,3526],{"emptyLinePlaceholder":35},[1291,20303,20304,20306,20308,20310,20312,20314,20316,20318,20320,20322,20324,20326,20328,20330],{"class":3427,"line":6043},[1291,20305,3841],{"class":3431},[1291,20307,694],{"class":3435},[1291,20309,4078],{"class":3457},[1291,20311,694],{"class":3435},[1291,20313,20090],{"class":3457},[1291,20315,694],{"class":3435},[1291,20317,9700],{"class":3812},[1291,20319,3816],{"class":3435},[1291,20321,20099],{"class":3812},[1291,20323,3566],{"class":3435},[1291,20325,3705],{"class":3435},[1291,20327,20106],{"class":3439},[1291,20329,3691],{"class":3435},[1291,20331,3827],{"class":3435},[1291,20333,20334,20336,20338,20340],{"class":3427,"line":6066},[1291,20335,3841],{"class":3431},[1291,20337,694],{"class":3435},[1291,20339,11274],{"class":3812},[1291,20341,4871],{"class":3435},[73,20343,20344],{},"The output will look like this:",[3418,20346,20348],{"className":6347,"code":20347,"language":6349,"meta":23,"style":23},"{\"parsed_text\":[[\"\\n\\n# Shopping List \\n\\nMilk Bread Eggs \",{}]],\"diff\":1,\"time\":1770035995376}\n",[3061,20349,20350],{"__ignoreMap":23},[1291,20351,20352,20354,20357,20359,20362,20365,20367,20370,20373,20376,20379,20382],{"class":3427,"line":3428},[1291,20353,8770],{"class":3435},[1291,20355,20356],{"class":6356},"\"parsed_text\"",[1291,20358,4390],{"class":3812},[1291,20360,20361],{"class":3439},"[[",[1291,20363,20364],{"class":6356},"\"\\n\\n# Shopping List \\n\\nMilk Bread Eggs \"",[1291,20366,3566],{"class":6356},[1291,20368,20369],{"class":3439},"{}",[1291,20371,20372],{"class":6356},"]],",[1291,20374,20375],{"class":6356},"\"diff\"",[1291,20377,20378],{"class":6356},":1,",[1291,20380,20381],{"class":6356},"\"time\"",[1291,20383,20384],{"class":6356},":1770035995376}\n",[73,20386,20387],{},"You can see that the text is successfully extracted from the shopping list!",[73,20389,20390],{},"Unfortunately, it seems that this list is incomplete, so you update the list, and take a new screenshot:",[1141,20392],{":zoomable":1143,"alt":20393,"src":20394,"style":133},"Updated shopping List","\u002Fassets\u002Fcontent\u002Fblog\u002Fpaddleocr\u002Fshopping_list_2.png",[73,20396,20397],{},"As previously, you can download this image with the following command:",[3418,20399,20401],{"className":6347,"code":20400,"language":6349,"meta":23,"style":23},"wget https:\u002F\u002Fgithub.com\u002Fpathwaycom\u002Fpathway\u002Ftree\u002Fmain\u002Fexamples\u002Fimages\u002Fshopping_list_2.png\n",[3061,20402,20403],{"__ignoreMap":23},[1291,20404,20405,20407],{"class":3427,"line":3428},[1291,20406,7203],{"class":6356},[1291,20408,20409],{"class":3439}," https:\u002F\u002Fgithub.com\u002Fpathwaycom\u002Fpathway\u002Ftree\u002Fmain\u002Fexamples\u002Fimages\u002Fshopping_list_2.png\n",[73,20411,20412,20413,20415],{},"Rename it with the same name as the previous one, and put it in the same ",[3061,20414,4102],{}," folder so it replaces the previous version.\nThe framework will automatically react to the change, redo the OCR using PaddleOCR and update the output:",[3418,20417,20419],{"className":6347,"code":20418,"language":6349,"meta":23,"style":23},"{\"parsed_text\":[[\"\\n\\n# Shopping List \\n\\nMilk Bread Eggs \",{}]],\"diff\":1,\"time\":1770036557176}\n{\"parsed_text\":[[\"\\n\\n# Shopping List \\n\\nMilk Bread Eggs \",{}]],\"diff\":-1,\"time\":1770036569170}\n{\"parsed_text\":[[\"\\n\\n# Shopping List \\n\\nMilk Bread Eggs Cheese \",{}]],\"diff\":1,\"time\":1770036569170}\n",[3061,20420,20421,20448,20476],{"__ignoreMap":23},[1291,20422,20423,20425,20427,20429,20431,20433,20435,20437,20439,20441,20443,20445],{"class":3427,"line":3428},[1291,20424,8770],{"class":3435},[1291,20426,20356],{"class":6356},[1291,20428,4390],{"class":3812},[1291,20430,20361],{"class":3439},[1291,20432,20364],{"class":6356},[1291,20434,3566],{"class":6356},[1291,20436,20369],{"class":3439},[1291,20438,20372],{"class":6356},[1291,20440,20375],{"class":6356},[1291,20442,20378],{"class":6356},[1291,20444,20381],{"class":6356},[1291,20446,20447],{"class":6356},":1770036557176}\n",[1291,20449,20450,20452,20454,20456,20458,20460,20462,20464,20466,20468,20471,20473],{"class":3427,"line":24},[1291,20451,8770],{"class":3435},[1291,20453,20356],{"class":6356},[1291,20455,4390],{"class":3812},[1291,20457,20361],{"class":3439},[1291,20459,20364],{"class":6356},[1291,20461,3566],{"class":6356},[1291,20463,20369],{"class":3439},[1291,20465,20372],{"class":6356},[1291,20467,20375],{"class":6356},[1291,20469,20470],{"class":6356},":-1,",[1291,20472,20381],{"class":6356},[1291,20474,20475],{"class":6356},":1770036569170}\n",[1291,20477,20478,20480,20482,20484,20486,20489,20491,20493,20495,20497,20499,20501],{"class":3427,"line":675},[1291,20479,8770],{"class":3435},[1291,20481,20356],{"class":6356},[1291,20483,4390],{"class":3812},[1291,20485,20361],{"class":3439},[1291,20487,20488],{"class":6356},"\"\\n\\n# Shopping List \\n\\nMilk Bread Eggs Cheese \"",[1291,20490,3566],{"class":6356},[1291,20492,20369],{"class":3439},[1291,20494,20372],{"class":6356},[1291,20496,20375],{"class":6356},[1291,20498,20378],{"class":6356},[1291,20500,20381],{"class":6356},[1291,20502,20475],{"class":6356},[73,20504,20505,20506,20509],{},"You can see that the old value was removed (second line, with ",[3061,20507,20508],{},"\"diff\":-1",") and the new value (with the cheese) was added!",[73,20511,20512,20513,20516],{},"Note that for this to work, you need to be in streaming mode (",[3061,20514,20515],{},"mode=\"streaming\""," in the connector definition).",[140,20518,20520],{"id":20519},"integrating-paddleocr-into-a-rag-pipeline","Integrating PaddleOCR into a RAG Pipeline",[73,20522,20523,20524,20526,20527,20531,20532,20538],{},"You can use the ",[3061,20525,19619],{}," directly into our ",[77,20528,20530],{"href":8933,"rel":20529},[81],"Q&A RAG template",".\nYou simply need to update the ",[77,20533,20536],{"href":20534,"rel":20535},"https:\u002F\u002Fgithub.com\u002Fpathwaycom\u002Fllm-app\u002Fblob\u002Fmain\u002Ftemplates\u002Fquestion_answering_rag\u002Fapp.yaml",[81],[3061,20537,7570],{}," configuration file and change the parser:",[3418,20540,20542],{"className":7720,"code":20541,"language":7722,"meta":23,"style":23},"$parser: !pw.xpacks.llm.parsers.PaddleOCRParser\n  concatenate_pages: True\n",[3061,20543,20544,20553],{"__ignoreMap":23},[1291,20545,20546,20548,20550],{"class":3427,"line":3428},[1291,20547,7974],{"class":3457},[1291,20549,4390],{"class":3435},[1291,20551,20552],{"class":7739}," !pw.xpacks.llm.parsers.PaddleOCRParser\n",[1291,20554,20555,20558,20560],{"class":3427,"line":24},[1291,20556,20557],{"class":3457},"  concatenate_pages",[1291,20559,4390],{"class":3435},[1291,20561,4853],{"class":7812},[73,20563,20564],{},"And that's it! Your RAG pipeline will now process documents in real-time using PaddleOCR.",[73,20566,19846,20567,694],{},[77,20568,20571],{"href":20569,"rel":20570},"https:\u002F\u002Fpathway.com\u002Fdevelopers\u002Ftemplates\u002Frag\u002Fdemo-question-answering",[81],"RAG template guide",[140,20573,5008],{"id":5007},[73,20575,20576],{},"Documents come in various formats, are often unstructured, and change frequently.\nTo handle this effectively, your data pipeline should adapt to the data's nature, not the other way around.",[73,20578,20579],{},"With PaddleOCR and Pathway Live Data Framework, you can process and use your data in real-time, regardless of format or volume.\nThis combination makes it easier to build responsive, scalable data processing pipelines.",[5019,20581,20582],{},"html pre.shiki code .s5Dmg, html code.shiki .s5Dmg{--shiki-default:#FFCB6B}html pre.shiki code .sfyAc, html code.shiki .sfyAc{--shiki-default:#C3E88D}html pre.shiki code .sAklC, html code.shiki .sAklC{--shiki-default:#89DDFF}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html pre.shiki code .s0W1g, html code.shiki .s0W1g{--shiki-default:#BABED8}html pre.shiki code .sx098, html code.shiki .sx098{--shiki-default:#F78C6C}html pre.shiki code .s6cf3, html code.shiki .s6cf3{--shiki-default:#89DDFF;--shiki-default-font-style:italic}html pre.shiki code .sdLwU, html code.shiki .sdLwU{--shiki-default:#82AAFF}html pre.shiki code .s-wAU, html code.shiki .s-wAU{--shiki-default:#F07178}html pre.shiki code .s7ZW3, html code.shiki .s7ZW3{--shiki-default:#BABED8;--shiki-default-font-style:italic}html pre.shiki code .sJ14y, html code.shiki .sJ14y{--shiki-default:#C792EA}html pre.shiki code .sbqyR, html code.shiki .sbqyR{--shiki-default:#FF9CAC}",{"title":23,"searchDepth":24,"depth":24,"links":20584},[20585,20586,20587,20591,20592,20598,20599],{"id":19551,"depth":24,"text":19552},{"id":19578,"depth":24,"text":19579},{"id":19594,"depth":24,"text":19595,"children":20588},[20589,20590],{"id":19612,"depth":675,"text":19613},{"id":19652,"depth":675,"text":19609},{"id":19695,"depth":24,"text":19696},{"id":19854,"depth":24,"text":19855,"children":20593},[20594,20595,20596,20597],{"id":19861,"depth":675,"text":19862},{"id":19900,"depth":675,"text":19901},{"id":20068,"depth":675,"text":20069},{"id":20113,"depth":675,"text":20114},{"id":20519,"depth":24,"text":20520},{"id":5007,"depth":24,"text":5008},{"single":35,"aside":34,"layout":90,"date":20601,"thumbnail":20602,"tags":20604},"2026-02-02",{"src":20603},"\u002Fassets\u002Fcontent\u002Fblog\u002Fthumbnails\u002Fpaddleocr-thumbnail.png",[90,17824,6268,20605],"framework","\u002Fframework\u002Fblog\u002Fpaddleocr",{"title":19522,"description":23},{"loc":20606},"framework\u002Fblog\u002F699.paddleocr","JE1_Jll_QcJDwTjSp31-NjBZviChAIW0_nFMeZNpClg",{"id":20612,"title":20613,"author":20614,"body":20615,"description":23,"extension":27,"meta":20852,"navigation":35,"path":20860,"seo":20861,"sitemap":20862,"stem":20863,"__hash__":20864},"content\u002Fframework\u002Fblog\u002F700.llm-yaml-templates.md","Build LLM\u002FRAG pipelines with YAML templates by Pathway Live Data Framework",{"id":7342,"url":7343,"name":7344,"description":7345,"img":7346,"provider":11,"linkedin":7347},{"type":13,"value":20616,"toc":20844},[20617,20620,20628,20632,20635,20655,20659,20662,20668,20672,20675,20786,20790,20793,20819,20823,20826,20828,20831,20837],[68,20618,20613],{"id":20619},"build-llmrag-pipelines-with-yaml-templates-by-pathway-live-data-framework",[73,20621,20622,20623,20627],{},"Exciting news! The Pathway Live Data Framework introduces a new feature that allows you to build ",[77,20624,20626],{"href":15745,"rel":20625},[81],"Large Language Model (LLM) Apps"," using YAML configuration files. This enables you to create production-ready RAG pipelines tailored to your needs—all without the hassle of writing Python code. Think of YAML templates as customizable templates with parameters that can be inherited by pipelines, making it easier for you to set up new projects or update multiple configurations.",[140,20629,20631],{"id":20630},"simplify-your-llm-pipeline-configuration","Simplify Your LLM Pipeline Configuration",[73,20633,20634],{},"The Pathway Live Data Framework allows you to put AI applications in production which offer high-accuracy RAG at scale using the most up-to-date knowledge available in your data sources. Configuring LLM pipelines traditionally involves altering Python scripts, which can be time-consuming and requires programming expertise within your team. With the new YAML configuration approach, you can:",[145,20636,20637,20643,20649],{},[148,20638,20639,20642],{},[169,20640,20641],{},"Easily adjust settings:"," Change parameters and settings directly in a human-readable YAML file.",[148,20644,20645,20648],{},[169,20646,20647],{},"Swap components effortlessly:"," Replace or modify components like data sources, LLM models, and indexers without code changes.",[148,20650,20651,20654],{},[169,20652,20653],{},"Keep configurations organized:"," Use variables and tags in YAML to maintain clean and manageable configuration files.",[140,20656,20658],{"id":20657},"learn-more","Learn More",[73,20660,20661],{},"To get started and see examples of how to customize your LLM templates with YAML, visit our detailed guide:",[73,20663,20664],{},[77,20665,20667],{"href":20666},"\u002Fdevelopers\u002Ftemplates\u002Fconfigure-yaml","Customizing LLM Templates with YAML Configuration Files",[140,20669,20671],{"id":20670},"customize-your-favorite-pipeline-with-yaml-templates","Customize Your Favorite Pipeline with YAML templates",[73,20673,20674],{},"Pick one of the application templates provided that suits you best. You can use it out of the box, or change some steps of the pipeline using YAML configuration files - for example, if you would like to add a new data source, or change a Vector Index into a Hybrid Index, it's just a one-line change.",[16104,20676,20677,20692],{},[16107,20678,20679],{},[16110,20680,20681,20689],{},[16113,20682,20683],{},[1291,20684,20688],{"className":20685},[20686,20687,2912],"md:w-[14rem]","w-4","Application (llm-app pipeline)",[16113,20690,20691],{},"Description",[16162,20693,20694,20707,20729,20742,20754,20768],{},[16110,20695,20696,20704],{},[16167,20697,20698],{},[77,20699,20701],{"href":8933,"rel":20700},[81],[3061,20702,20703],{},"Question-Answering RAG App",[16167,20705,20706],{},"Basic end-to-end RAG app. A question-answering pipeline that uses the GPT model of choice to provide answers to queries to your documents (PDF, DOCX,...) on a live connected data source (files, Google Drive, Sharepoint,...).",[16110,20708,20709,20718],{},[16167,20710,20711],{},[77,20712,20715],{"href":20713,"rel":20714},"https:\u002F\u002Fgithub.com\u002Fpathwaycom\u002Fllm-app\u002Ftree\u002Fmain\u002Ftemplates\u002Fdocument_indexing",[81],[3061,20716,20717],{},"Live Document Indexing (Vector Store \u002F Retriever)",[16167,20719,20720,20721,5681,20724,20728],{},"A real-time document indexing pipeline for RAG that acts as a vector store service. It performs live indexing on your documents (PDF, DOCX,...) from a connected data source. It can be used with any frontend, or integrated as a retriever backend for a ",[77,20722,5090],{"href":20723},"\u002Fblog\u002Flangchain-integration",[77,20725,20727],{"href":20726},"\u002Fblog\u002Fllamaindex-pathway","Llamaindex"," application",[16110,20730,20731,20739],{},[16167,20732,20733],{},[77,20734,20736],{"href":8850,"rel":20735},[81],[3061,20737,20738],{},"Multimodal RAG pipeline",[16167,20740,20741],{},"Multimodal RAG using MLLM in the parsing stage to index PDFs. It is perfect for extracting information from unstructured financial documents in your folders (including charts and tables), updating results as documents change or new ones arrive.",[16110,20743,20744,20751],{},[16167,20745,20746],{},[77,20747,20749],{"href":8823,"rel":20748},[81],[3061,20750,8819],{},[16167,20752,20753],{},"Multi-modal search service that leverages GPT-4o for PowerPoint and PDF presentations. Content is parsed and enriched with metadata through a slide parsing module powered by GPT-4o, with parsed text and metadata stored in an in-memory indexing system that supports both vector and hybrid indexing.",[16110,20755,20756,20765],{},[16167,20757,20758],{},[77,20759,20762],{"href":20760,"rel":20761},"https:\u002F\u002Fgithub.com\u002Fpathwaycom\u002Fllm-app\u002Ftree\u002Fmain\u002Ftemplates\u002Fadaptive_rag",[81],[3061,20763,20764],{},"Adaptive RAG App",[16167,20766,20767],{},"A RAG application using Adaptive RAG, a technique developed by Pathway to reduce token cost in RAG up to 4x while maintaining accuracy.",[16110,20769,20770,20779],{},[16167,20771,20772],{},[77,20773,20776],{"href":20774,"rel":20775},"https:\u002F\u002Fgithub.com\u002Fpathwaycom\u002Fllm-app\u002Ftree\u002Fmain\u002Ftemplates\u002Fprivate_rag",[81],[3061,20777,20778],{},"Private RAG App with Mistral and Ollama",[16167,20780,20781,20782,20785],{},"A fully private (local) version of the ",[3061,20783,20784],{},"question_answering_rag"," RAG pipeline using Pathway Live Data Framework, Mistral, and Ollama.",[140,20787,20789],{"id":20788},"how-it-works","How It Works",[73,20791,20792],{},"The Pathway Live Data Framework uses a custom YAML parser to map your configurations to Python objects and functions. This means you can:",[145,20794,20795,20801,20807,20813],{},[148,20796,20797,20800],{},[169,20798,20799],{},"Initialize Objects:"," Instantiate classes directly within the YAML file by using map tags .",[148,20802,20803,20806],{},[169,20804,20805],{},"Reference Enums:"," Set values of enum arguments using YAML tags.",[148,20808,20809,20812],{},[169,20810,20811],{},"Define Schemas:"," Create data schemas inline using helper functions.",[148,20814,20815,20818],{},[169,20816,20817],{},"Use Variables:"," Declare variables for reuse throughout your configuration to keep it concise and maintainable.",[140,20820,20822],{"id":20821},"are-you-looking-to-build-an-enterprise-grade-rag-app","Are you looking to build an enterprise-grade RAG app?",[73,20824,20825],{},"Pathway is trusted by industry leaders such as NATO and Intel, and is natively available on both AWS and Azure Marketplaces. If you’d like to explore how Pathway Live Data Framework can support your RAG and Generative AI initiatives, we invite you to schedule a discovery session with our team.",[140,20827,5008],{"id":5007},[73,20829,20830],{},"Whether you're a seasoned developer or just starting out, the customizable LLM templates with YAML configuration enable you to build, customize, and deploy powerful LLM pipelines with ease. We can't wait to see what you'll create!",[73,20832,20833,20834,8876],{},"Join the Community and share your experiences by joining our community forum. Join the Pathway Discord server (",[77,20835,8875],{"href":3072,"rel":20836},[81],[73,20838,20839,20840,9175],{},"For any questions or feedback, don't hesitate to reach out to us at ",[77,20841,20843],{"href":20842},"mailto:contact@pathway.com","contact@pathway.com",{"title":23,"searchDepth":24,"depth":24,"links":20845},[20846,20847,20848,20849,20850,20851],{"id":20630,"depth":24,"text":20631},{"id":20657,"depth":24,"text":20658},{"id":20670,"depth":24,"text":20671},{"id":20788,"depth":24,"text":20789},{"id":20821,"depth":24,"text":20822},{"id":5007,"depth":24,"text":5008},{"layout":90,"date":20853,"tags":20854,"thumbnail":20855,"related":20857,"hidden":35},"2024-10-29",[94,6268],{"src":20856,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fllm-yaml-templates-th.png",[20858,20859],"\u002Fblog\u002Fpathway-showcased-during-intel-ai-summit","\u002Fblog\u002Fpathway-joins-the-opea-to-accelerate-generative-ai-adoption","\u002Fframework\u002Fblog\u002Fllm-yaml-templates",{"title":20613,"description":23},{"loc":20860},"framework\u002Fblog\u002F700.llm-yaml-templates","XPJGS0KpcF1-fj6KznxPrNIwS01utSew9_1YQumSojY",{"id":20866,"title":20867,"author":20868,"body":20869,"description":20913,"extension":27,"meta":20914,"navigation":35,"path":20920,"seo":20921,"sitemap":20922,"stem":20923,"__hash__":20924},"content\u002Fframework\u002Fblog\u002F701.azure-aci-deploy.md","Pathway Live Data Framework is Now Available on Microsoft Azure!",{"id":7342,"url":7343,"name":7344,"description":7345,"img":7346,"provider":11,"linkedin":7347},{"type":13,"value":20870,"toc":20911},[20871,20874,20877,20886,20889,20899,20902,20907],[68,20872,20867],{"id":20873},"pathway-live-data-framework-is-now-available-on-microsoft-azure",[73,20875,20876],{},"We are thrilled to announce that the Pathway Live Data Framework is now available on the Azure Marketplace!",[73,20878,20879,20880,20885],{},"The Pathway Live Data Framework BYOL (Bring Your Own License) ",[77,20881,20884],{"href":20882,"rel":20883},"https:\u002F\u002Fazuremarketplace.microsoft.com\u002Fen-us\u002Fmarketplace\u002Fapps\u002Fnavalgo1695057418511.pathway-byol?tab=Overview",[81],"Container"," is now accessible within the Azure ecosystem. This offering provides a ready-to-use Docker image with Pathway Live Data Framework and all its dependencies pre-installed, making it easier than ever to deploy your data processing pipelines on Azure. You can start using the container right away. A free license key will help you unlock additional features of the framework.",[73,20887,20888],{},"Alternatively, you can deploy your pipeline code using Azure Container Instances and the framework's tools. This method allows you to run your applications in a managed Docker container without the need to manage virtual machines, offering more flexibility and control over your deployment.",[73,20890,20891,20894,20895,20898],{},[169,20892,20893],{},"Don't know where to start?"," Try our ",[77,20896,17824],{"href":20897},"\u002Fdevelopers\u002Fuser-guide\u002Fdeployment\u002Fazure-aci-deploy"," in building an ETL process that tracks GitHub commit history, removes sensitive data, and loads the results into a Delta Lake.",[73,20900,20901],{},"We are excited to bring the framework's powerful data processing capabilities to Azure users. Get started today and let us know your feedback in our Discord community!",[73,20903,20833,20904,8876],{},[77,20905,8875],{"href":3072,"rel":20906},[81],[73,20908,20839,20909,9175],{},[77,20910,20843],{"href":20842},{"title":23,"searchDepth":24,"depth":24,"links":20912},[],"Discover Pathway Live Data Framework on Azure Marketplace! Access the Pathway Live Data Framework BYOL container with pre-installed dependencies, ready for deployment in the AWS ecosystem. Unlock more features with a free license key and explore our tutorial on building an ETL process using Pathway Live Data Framework",{"layout":90,"date":20915,"tags":20916,"thumbnail":20917,"related":20919,"hidden":35},"2024-11-27",[94],{"src":20918,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fpathway-azure-marketplace-th.png",[20858,20859],"\u002Fframework\u002Fblog\u002Fazure-aci-deploy",{"title":20867,"description":20913},{"loc":20920},"framework\u002Fblog\u002F701.azure-aci-deploy","R-Y1vge83eBBdzbywCsA_98zv9YbU-wzccAjFPcl_UE",{"id":20926,"title":20927,"author":20928,"body":20929,"description":22704,"extension":27,"meta":22705,"navigation":35,"path":22712,"seo":22713,"sitemap":22714,"stem":22715,"__hash__":22716},"content\u002Fframework\u002Fblog\u002F878.gemini2-document-ingestion-and-analytics.md","Gemini 2.0 for Document Ingestion and Analytics with Pathway",{"name":7344,"description":7345,"img":7346,"provider":11,"linkedin":7347},{"type":13,"value":20930,"toc":22675},[20931,20934,20937,20949,20960,20964,20968,20982,20986,21004,21008,21016,21020,21034,21038,21052,21056,21068,21078,21102,21113,21119,21123,21127,21134,21149,21152,21163,21167,21169,21182,21188,21205,21211,21215,21223,21226,21807,21811,21858,21861,21865,21873,21877,21881,21905,21908,21915,21922,21944,21946,21950,21952,21954,21968,21970,21980,21982,21985,21990,22011,22016,22019,22094,22097,22101,22110,22112,22115,22174,22177,22245,22247,22249,22252,22327,22330,22356,22359,22362,22366,22373,22379,22386,22389,22396,22399,22406,22409,22413,22421,22533,22538,22590,22596,22600,22603,22606,22610,22624,22631,22633,22637,22657,22660,22662,22664,22666,22672],[68,20932,20927],{"id":20933},"gemini-20-for-document-ingestion-and-analytics-with-pathway",[73,20935,20936],{},"Most LLM showcases emphasize question-answering on existing data. Yet, ingesting raw documents is often a bigger challenge—especially for slides or PDFs with both text and visuals.",[73,20938,20939,20940,20943,20944,20948],{},"With ",[169,20941,20942],{},"Gemini 2.0",", you can convert streaming slides or PDFs into structured, query-ready data. Its multimodal capabilities handle OCR-style extraction, while Pathway Live Data Framework handles subsequent chunking, indexing, and retrieval to power real-time analytics. This approach leverages an existing open-source AI pipeline—",[77,20945,20947],{"href":8850,"rel":20946},[81],"Multimodal RAG with Pathway","—which showcases how to parse and index unstructured financial documents, including tables and images, using a vision language model.",[73,20950,20951,20952,20955,20956,20959],{},"In this article, you'll learn how to plug Gemini 2.0 into a ",[169,20953,20954],{},"real-time RAG pipeline built with Pathway Live Data Framework",", enabling you to power ",[169,20957,20958],{},"accurate, context-aware decisions"," on constantly changing data. This setup allows you to seamlessly integrate a \"write path\" (ingestion) and \"read path\" (query), simplifying your entire data flow with the framework.",[140,20961,20963],{"id":20962},"benefits-of-using-pathway-live-data-framework-for-document-ingestion-and-analytics-with-gemini-20","Benefits of Using Pathway Live Data Framework for Document Ingestion and Analytics with Gemini 2.0",[3189,20965,20967],{"id":20966},"_1-unified-pipeline-write-path-read-path","1. Unified Pipeline (Write Path + Read Path)",[145,20969,20970,20976],{},[148,20971,20972,20975],{},[169,20973,20974],{},"Write Path",": Acquire and prepare knowledge at ingestion time (parse PDFs, PPTX, etc.). Break them into meaningful chunks, embed them, and index them immediately.",[148,20977,20978,20981],{},[169,20979,20980],{},"Read Path",": At query time, retrieve the most relevant chunks and let the LLM compose a final answer. Pathway orchestrates both steps in a single solution.",[3189,20983,20985],{"id":20984},"_2-seamless-multimodal-parsing","2. Seamless Multimodal Parsing",[145,20987,20988,20998],{},[148,20989,20990,20993,20994,20997],{},[169,20991,20992],{},"Vision-Based Approach",": The framework's ",[3061,20995,20996],{},"SlideParser"," converts each slide or PDF page into an image, letting the LLM analyze text, tables, charts, and other visual elements in a single step.",[148,20999,21000,21003],{},[169,21001,21002],{},"Simplified Architecture",": No need for separate OCR or specialized table-parsing modules—it's all handled in the same pipeline.",[3189,21005,21007],{"id":21006},"_3-live-indexing","3. Live Indexing",[145,21009,21010],{},[148,21011,21012,21015],{},[169,21013,21014],{},"Continuous Updates",": As new documents arrive, the framework automatically updates the vector store, ensuring your RAG system always reflects the most current data.",[3189,21017,21019],{"id":21018},"_4-scalability-and-flexibility","4. Scalability and Flexibility",[145,21021,21022,21028],{},[148,21023,21024,21027],{},[169,21025,21026],{},"Streaming Architecture",": Pathway Live Data Framework orchestrates concurrency, error handling, and transformations, even with high-volume or constantly updating documents.",[148,21029,21030,21033],{},[169,21031,21032],{},"Customizable Templates",": The entire pipeline is based on an existing YAML-defined template that you can adapt to use Gemini. You can configure data sources, switch from a vector to a hybrid index, or tweak other steps with just a one-line change. This makes it easier to set up new projects or update multiple configurations at once.",[3189,21035,21037],{"id":21036},"_5-minimal-code-overhead","5. Minimal Code Overhead",[145,21039,21040,21046],{},[148,21041,21042,21045],{},[169,21043,21044],{},"Concise Integration",": A few lines of code handle ingestion, parsing, embedding, and serve a retrieval-augmented generation (RAG) endpoint.",[148,21047,21048,21051],{},[169,21049,21050],{},"Focus on Accuracy",": By reducing boilerplate, you can concentrate on improving knowledge retrieval instead of juggling multiple services or complex plumbing.",[140,21053,21055],{"id":21054},"how-to-use-pathway-live-data-framework-gemini-20-for-ocr-sample-code-walkthrough","How to Use Pathway Live Data Framework & Gemini 2.0 for OCR: Sample Code Walkthrough",[73,21057,21058,21059,21067],{},"Below is an example adapted from the ",[77,21060,21063,21064,21066],{"href":21061,"rel":21062},"https:\u002F\u002Fgithub.com\u002Fpathwaycom\u002Fllm-app\u002Ftree\u002Fmain\u002Ftemplates\u002F4o_multimodal_rag",[81],"Pathway's ",[3061,21065,3203],{}," pipeline",", showing how you can parse PPTX\u002FPDF files, generate textual chunks, embed them and store them for retrieval in a single, integrated pipeline.",[7500,21069,21070],{},[73,21071,21072,21074,21075,21077],{},[169,21073,7506],{},": SlideParser requires a Pathway license key. If you haven’t already, ",[77,21076,7512],{"href":7548}," to unlock SlideParser and other enterprise features. The application will be updated with this key in Step 5.",[145,21079,21080,21085,21090],{},[148,21081,21082,21084],{},[169,21083,1279],{},": Synchronizes and indexes data in real-time, orchestrating concurrency and streaming.",[148,21086,21087,21089],{},[169,21088,20996],{},": Converts ppts\u002Fpdfs to images and uses a vision-capable LLM for parsing.",[148,21091,21092,21094,21095,3126,21098,21101],{},[169,21093,20942],{},": Accessed through ",[3061,21096,21097],{},"litellm",[3061,21099,21100],{},"google.generativeai"," for OCR-like extraction and chunking.",[73,21103,21104,21105,21108,21109,21112],{},"The Pathway Live Data Framework acts as an ",[169,21106,21107],{},"end-to-end RAG orchestrator",", wrapping data ingestion, streaming, and real-time indexing into one ",[169,21110,21111],{},"containerized"," pipeline that scales effortlessly from a laptop to enterprise deployments.",[3189,21114,21116],{"id":21115},"architecture-diagram",[169,21117,21118],{},"Architecture diagram",[1141,21120],{":zoomable":1143,"alt":21118,"className":21121,"src":21122},[133],"\u002Fassets\u002Fcontent\u002Fblog\u002Fgemini-diagram.svg",[3189,21124,21126],{"id":21125},"step-1-clone-the-llm-app-templates-repository","Step 1: Clone the LLM App Templates Repository",[73,21128,21129,21130,21133],{},"Clone the ",[3061,21131,21132],{},"llm-app"," repository from GitHub. This repository contains all the files you need.",[3418,21135,21136],{"className":6347,"code":7630,"language":6349,"meta":23,"style":23},[3061,21137,21138],{"__ignoreMap":23},[1291,21139,21140,21143,21146],{"class":3427,"line":3428},[1291,21141,21142],{"class":6356},"git",[1291,21144,21145],{"class":3439}," clone",[1291,21147,21148],{"class":3439}," https:\u002F\u002Fgithub.com\u002Fpathwaycom\u002Fllm-app.git\n",[73,21150,21151],{},"If you receive an error because an older version of the repository exists, navigate to the correct directory and update it using:",[3418,21153,21154],{"className":6347,"code":7643,"language":6349,"meta":23,"style":23},[3061,21155,21156],{"__ignoreMap":23},[1291,21157,21158,21160],{"class":3427,"line":3428},[1291,21159,21142],{"class":6356},[1291,21161,21162],{"class":3439}," pull\n",[3189,21164,21166],{"id":21165},"step-2-navigate-to-the-multimodal-rag-project-directory","Step 2: Navigate to the Multimodal RAG Project Directory",[73,21168,7656],{},[3418,21170,21172],{"className":6347,"code":21171,"language":6349,"meta":23,"style":23},"cd templates\u002Fmultimodal_rag\n",[3061,21173,21174],{"__ignoreMap":23},[1291,21175,21176,21179],{"class":3427,"line":3428},[1291,21177,21178],{"class":3812},"cd",[1291,21180,21181],{"class":3439}," templates\u002Fmultimodal_rag\n",[3189,21183,21185,21186],{"id":21184},"step-3-modify-dockerfile","Step 3: Modify ",[3061,21187,7606],{},[73,21189,21190,21191,14364,21194,21197,21198,3126,21201,21204],{},"Below is an updated Dockerfile that replaces the default dependencies (",[3061,21192,21193],{},"python3-opencv",[3061,21195,21196],{},"tesseract-ocr",", etc.) with just ",[3061,21199,21200],{},"poppler-utils",[3061,21202,21203],{},"libreoffice",", reducing the container footprint while still covering multimodal parsing needs.",[3418,21206,21209],{"className":21207,"code":21208,"language":4999,"meta":23},[4997],"FROM pathwaycom\u002Fpathway:latest\n\nWORKDIR \u002Fapp\n\nRUN apt-get update && apt-get install -y \\\n    poppler-utils \\\n    libreoffice \\\n    && rm -rf \u002Fvar\u002Flib\u002Fapt\u002Flists\u002F* \u002Fvar\u002Fcache\u002Fapt\u002Farchives\u002F*\n\nCOPY . .\n\nCMD [\"python\", \"app.py\"]\n",[3061,21210,21208],{"__ignoreMap":23},[3189,21212,7695,21213,7698],{"id":7694},[3061,21214,7570],{},[73,21216,21217,21218,21220,21221,694],{},"In the default YAML configuration, the pipeline uses GPT-3.5 for language tasks and a generic ",[3061,21219,4531],{}," for document parsing. The snippet below replaces these defaults to integrate Gemini 2.0 for OCR-like parsing, updates the prompt to better handle slide images, and switches the embedder to ",[3061,21222,4268],{},[73,21224,21225],{},"By default, documents are read from a local data folder (see $sources in the YAML). If files need to be pulled from other sources—such as SharePoint, Google Drive, or S3—Pathway Live Data Framework allows seamless connector switching by adding or replacing the relevant I\u002FO block.",[3418,21227,21229],{"className":7720,"code":21228,"filename":7570,"language":7722,"meta":23,"style":23},"$sources:\n  - !pw.io.fs.read\n    path: data\n    format: binary\n    with_metadata: true\n\n$parser_llm: !pw.xpacks.llm.llms.LiteLLMChat\n  model: \"gemini\u002Fgemini-2.0-flash\"\n  retry_strategy: !pw.udfs.ExponentialBackoffRetryStrategy\n    max_retries: 2\n  cache_strategy: !pw.udfs.DefaultCache {}\n  temperature: 0\n\n$parse_prompt: |\n  Apply OCR to following page and respond in markdown. \n  Tables should be formatted as markdown tables. Make sure to include table information such as title in a readable format.\n  Spell out all the text that is on the page.\n\n$embedder: !pw.xpacks.llm.embedders.GeminiEmbedder\n  model: \"models\u002Fembedding-001\"\n  cache_strategy: !pw.udfs.DefaultCache {}\n  retry_strategy: !pw.udfs.ExponentialBackoffRetryStrategy\n    max_retries: 3\n\n$splitter: !pw.xpacks.llm.splitters.TokenCountSplitter\n  min_tokens: 200\n  max_tokens: 750\n\n$parser: !pw.xpacks.llm.parsers.SlideParser\n  llm: $parser_llm\n  parse_prompt: $parse_prompt\n  image_size: !!python\u002Ftuple [800, 1200]\n  cache_strategy: !pw.udfs.DefaultCache {}\n\n$knn_index: !pw.stdlib.indexing.BruteForceKnnFactory\n  reserved_space: 1000\n  embedder: $embedder\n  metric: !pw.engine.BruteForceKnnMetricKind.COS\n\n$bm25_index: !pw.stdlib.indexing.TantivyBM25Factory\n\n$retriever_factory: !pw.stdlib.indexing.HybridIndexFactory\n  retriever_factories:\n    - $knn_index\n    - $bm25_index\n  \n$document_store: !pw.xpacks.llm.document_store.DocumentStore\n  docs: $sources\n  parser: $parser\n  splitter: $splitter\n  retriever_factory: $retriever_factory\n\n$llm: !pw.xpacks.llm.llms.OpenAIChat\n  model: \"gpt-4o\"\n  retry_strategy: !pw.udfs.ExponentialBackoffRetryStrategy\n    max_retries: 2\n  cache_strategy: !pw.udfs.DefaultCache {}\n  temperature: 0\n  verbose: true\n\n$prompt_template: |\n  You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.\n  Question: {query} \n\n  Context: {context}\n\n  Answer:\n\nquestion_answerer: !pw.xpacks.llm.question_answering.BaseRAGQuestionAnswerer\n  llm: $llm\n  indexer: $document_store\n  prompt_template: $prompt_template\n  # Optionally, you can adjust the number of documents included in the context\n  # search_topk: 6\n",[3061,21230,21231,21237,21244,21253,21262,21270,21274,21284,21297,21305,21314,21324,21332,21336,21346,21351,21356,21361,21365,21374,21386,21396,21404,21413,21417,21425,21435,21444,21448,21457,21466,21476,21498,21508,21512,21521,21529,21537,21546,21550,21560,21564,21573,21580,21588,21595,21599,21607,21615,21623,21631,21639,21643,21651,21663,21671,21679,21689,21697,21706,21710,21719,21725,21731,21736,21742,21747,21753,21758,21767,21776,21785,21796,21802],{"__ignoreMap":23},[1291,21232,21233,21235],{"class":3427,"line":3428},[1291,21234,7729],{"class":3457},[1291,21236,5243],{"class":3435},[1291,21238,21239,21241],{"class":3427,"line":24},[1291,21240,7736],{"class":3435},[1291,21242,21243],{"class":7739}," !pw.io.fs.read\n",[1291,21245,21246,21248,21250],{"class":3427,"line":675},[1291,21247,4095],{"class":3457},[1291,21249,4390],{"class":3435},[1291,21251,21252],{"class":3439}," data\n",[1291,21254,21255,21257,21259],{"class":3427,"line":3542},[1291,21256,4112],{"class":3457},[1291,21258,4390],{"class":3435},[1291,21260,21261],{"class":3439}," binary\n",[1291,21263,21264,21266,21268],{"class":3427,"line":3547},[1291,21265,4128],{"class":3457},[1291,21267,4390],{"class":3435},[1291,21269,7813],{"class":7812},[1291,21271,21272],{"class":3427,"line":3572},[1291,21273,3526],{"emptyLinePlaceholder":35},[1291,21275,21276,21279,21281],{"class":3427,"line":3614},[1291,21277,21278],{"class":3457},"$parser_llm",[1291,21280,4390],{"class":3435},[1291,21282,21283],{"class":7739}," !pw.xpacks.llm.llms.LiteLLMChat\n",[1291,21285,21286,21288,21290,21292,21295],{"class":3427,"line":3640},[1291,21287,7842],{"class":3457},[1291,21289,4390],{"class":3435},[1291,21291,3705],{"class":3435},[1291,21293,21294],{"class":3439},"gemini\u002Fgemini-2.0-flash",[1291,21296,3746],{"class":3435},[1291,21298,21299,21301,21303],{"class":3427,"line":3665},[1291,21300,7856],{"class":3457},[1291,21302,4390],{"class":3435},[1291,21304,7861],{"class":7739},[1291,21306,21307,21309,21311],{"class":3427,"line":3670},[1291,21308,7866],{"class":3457},[1291,21310,4390],{"class":3435},[1291,21312,21313],{"class":3451}," 2\n",[1291,21315,21316,21318,21320,21322],{"class":3427,"line":3677},[1291,21317,7876],{"class":3457},[1291,21319,4390],{"class":3435},[1291,21321,7881],{"class":7739},[1291,21323,7884],{"class":3435},[1291,21325,21326,21328,21330],{"class":3427,"line":3877},[1291,21327,7889],{"class":3457},[1291,21329,4390],{"class":3435},[1291,21331,7894],{"class":3451},[1291,21333,21334],{"class":3427,"line":3916},[1291,21335,3526],{"emptyLinePlaceholder":35},[1291,21337,21338,21341,21343],{"class":3427,"line":4519},[1291,21339,21340],{"class":3457},"$parse_prompt",[1291,21342,4390],{"class":3435},[1291,21344,21345],{"class":3475}," |\n",[1291,21347,21348],{"class":3427,"line":6038},[1291,21349,21350],{"class":3439},"  Apply OCR to following page and respond in markdown. \n",[1291,21352,21353],{"class":3427,"line":6043},[1291,21354,21355],{"class":3439},"  Tables should be formatted as markdown tables. Make sure to include table information such as title in a readable format.\n",[1291,21357,21358],{"class":3427,"line":6066},[1291,21359,21360],{"class":3439},"  Spell out all the text that is on the page.\n",[1291,21362,21363],{"class":3427,"line":6078},[1291,21364,3526],{"emptyLinePlaceholder":35},[1291,21366,21367,21369,21371],{"class":3427,"line":6089},[1291,21368,7913],{"class":3457},[1291,21370,4390],{"class":3435},[1291,21372,21373],{"class":7739}," !pw.xpacks.llm.embedders.GeminiEmbedder\n",[1291,21375,21376,21378,21380,21382,21384],{"class":3427,"line":6124},[1291,21377,7842],{"class":3457},[1291,21379,4390],{"class":3435},[1291,21381,3705],{"class":3435},[1291,21383,4313],{"class":3439},[1291,21385,3746],{"class":3435},[1291,21387,21388,21390,21392,21394],{"class":3427,"line":6133},[1291,21389,7876],{"class":3457},[1291,21391,4390],{"class":3435},[1291,21393,7881],{"class":7739},[1291,21395,7884],{"class":3435},[1291,21397,21398,21400,21402],{"class":3427,"line":6141},[1291,21399,7856],{"class":3457},[1291,21401,4390],{"class":3435},[1291,21403,7861],{"class":7739},[1291,21405,21406,21408,21410],{"class":3427,"line":6151},[1291,21407,7866],{"class":3457},[1291,21409,4390],{"class":3435},[1291,21411,21412],{"class":3451}," 3\n",[1291,21414,21415],{"class":3427,"line":6923},[1291,21416,3526],{"emptyLinePlaceholder":35},[1291,21418,21419,21421,21423],{"class":3427,"line":6928},[1291,21420,7950],{"class":3457},[1291,21422,4390],{"class":3435},[1291,21424,7955],{"class":7739},[1291,21426,21427,21430,21432],{"class":3427,"line":6934},[1291,21428,21429],{"class":3457},"  min_tokens",[1291,21431,4390],{"class":3435},[1291,21433,21434],{"class":3451}," 200\n",[1291,21436,21437,21439,21441],{"class":3427,"line":6940},[1291,21438,7960],{"class":3457},[1291,21440,4390],{"class":3435},[1291,21442,21443],{"class":3451}," 750\n",[1291,21445,21446],{"class":3427,"line":6952},[1291,21447,3526],{"emptyLinePlaceholder":35},[1291,21449,21450,21452,21454],{"class":3427,"line":6984},[1291,21451,7974],{"class":3457},[1291,21453,4390],{"class":3435},[1291,21455,21456],{"class":7739}," !pw.xpacks.llm.parsers.SlideParser\n",[1291,21458,21459,21461,21463],{"class":3427,"line":7996},[1291,21460,8131],{"class":3457},[1291,21462,4390],{"class":3435},[1291,21464,21465],{"class":3439}," $parser_llm\n",[1291,21467,21468,21471,21473],{"class":3427,"line":8007},[1291,21469,21470],{"class":3457},"  parse_prompt",[1291,21472,4390],{"class":3435},[1291,21474,21475],{"class":3439}," $parse_prompt\n",[1291,21477,21478,21481,21483,21486,21488,21491,21493,21496],{"class":3427,"line":8018},[1291,21479,21480],{"class":3457},"  image_size",[1291,21482,4390],{"class":3435},[1291,21484,21485],{"class":7739}," !!python\u002Ftuple",[1291,21487,4145],{"class":3435},[1291,21489,21490],{"class":3451},"800",[1291,21492,3566],{"class":3435},[1291,21494,21495],{"class":3451}," 1200",[1291,21497,5267],{"class":3435},[1291,21499,21500,21502,21504,21506],{"class":3427,"line":8029},[1291,21501,7876],{"class":3457},[1291,21503,4390],{"class":3435},[1291,21505,7881],{"class":7739},[1291,21507,7884],{"class":3435},[1291,21509,21510],{"class":3427,"line":8040},[1291,21511,3526],{"emptyLinePlaceholder":35},[1291,21513,21514,21517,21519],{"class":3427,"line":8051},[1291,21515,21516],{"class":3457},"$knn_index",[1291,21518,4390],{"class":3435},[1291,21520,8004],{"class":7739},[1291,21522,21523,21525,21527],{"class":3427,"line":8057},[1291,21524,8010],{"class":3457},[1291,21526,4390],{"class":3435},[1291,21528,8015],{"class":3451},[1291,21530,21531,21533,21535],{"class":3427,"line":8068},[1291,21532,8021],{"class":3457},[1291,21534,4390],{"class":3435},[1291,21536,8026],{"class":3439},[1291,21538,21539,21541,21543],{"class":3427,"line":8079},[1291,21540,8032],{"class":3457},[1291,21542,4390],{"class":3435},[1291,21544,21545],{"class":7739}," !pw.engine.BruteForceKnnMetricKind.COS\n",[1291,21547,21548],{"class":3427,"line":8090},[1291,21549,3526],{"emptyLinePlaceholder":35},[1291,21551,21552,21555,21557],{"class":3427,"line":8101},[1291,21553,21554],{"class":3457},"$bm25_index",[1291,21556,4390],{"class":3435},[1291,21558,21559],{"class":7739}," !pw.stdlib.indexing.TantivyBM25Factory\n",[1291,21561,21562],{"class":3427,"line":8112},[1291,21563,3526],{"emptyLinePlaceholder":35},[1291,21565,21566,21568,21570],{"class":3427,"line":8117},[1291,21567,7999],{"class":3457},[1291,21569,4390],{"class":3435},[1291,21571,21572],{"class":7739}," !pw.stdlib.indexing.HybridIndexFactory\n",[1291,21574,21575,21578],{"class":3427,"line":8128},[1291,21576,21577],{"class":3457},"  retriever_factories",[1291,21579,5243],{"class":3435},[1291,21581,21582,21585],{"class":3427,"line":8139},[1291,21583,21584],{"class":3435},"    -",[1291,21586,21587],{"class":3439}," $knn_index\n",[1291,21589,21590,21592],{"class":3427,"line":8150},[1291,21591,21584],{"class":3435},[1291,21593,21594],{"class":3439}," $bm25_index\n",[1291,21596,21597],{"class":3427,"line":8156},[1291,21598,8054],{"class":3431},[1291,21600,21601,21603,21605],{"class":3427,"line":8162},[1291,21602,8060],{"class":3457},[1291,21604,4390],{"class":3435},[1291,21606,8065],{"class":7739},[1291,21608,21609,21611,21613],{"class":3427,"line":8168},[1291,21610,8071],{"class":3457},[1291,21612,4390],{"class":3435},[1291,21614,8076],{"class":3439},[1291,21616,21617,21619,21621],{"class":3427,"line":8174},[1291,21618,8082],{"class":3457},[1291,21620,4390],{"class":3435},[1291,21622,8087],{"class":3439},[1291,21624,21625,21627,21629],{"class":3427,"line":8180},[1291,21626,8093],{"class":3457},[1291,21628,4390],{"class":3435},[1291,21630,8098],{"class":3439},[1291,21632,21633,21635,21637],{"class":3427,"line":8186},[1291,21634,8104],{"class":3457},[1291,21636,4390],{"class":3435},[1291,21638,8109],{"class":3439},[1291,21640,21641],{"class":3427,"line":8191},[1291,21642,3526],{"emptyLinePlaceholder":35},[1291,21644,21645,21647,21649],{"class":3427,"line":8197},[1291,21646,7832],{"class":3457},[1291,21648,4390],{"class":3435},[1291,21650,7837],{"class":7739},[1291,21652,21653,21655,21657,21659,21661],{"class":3427,"line":8203},[1291,21654,7842],{"class":3457},[1291,21656,4390],{"class":3435},[1291,21658,3705],{"class":3435},[1291,21660,7849],{"class":3439},[1291,21662,3746],{"class":3435},[1291,21664,21665,21667,21669],{"class":3427,"line":8209},[1291,21666,7856],{"class":3457},[1291,21668,4390],{"class":3435},[1291,21670,7861],{"class":7739},[1291,21672,21673,21675,21677],{"class":3427,"line":8214},[1291,21674,7866],{"class":3457},[1291,21676,4390],{"class":3435},[1291,21678,21313],{"class":3451},[1291,21680,21681,21683,21685,21687],{"class":3427,"line":8220},[1291,21682,7876],{"class":3457},[1291,21684,4390],{"class":3435},[1291,21686,7881],{"class":7739},[1291,21688,7884],{"class":3435},[1291,21690,21691,21693,21695],{"class":3427,"line":8226},[1291,21692,7889],{"class":3457},[1291,21694,4390],{"class":3435},[1291,21696,7894],{"class":3451},[1291,21698,21699,21702,21704],{"class":3427,"line":8231},[1291,21700,21701],{"class":3457},"  verbose",[1291,21703,4390],{"class":3435},[1291,21705,7813],{"class":7812},[1291,21707,21708],{"class":3427,"line":8237},[1291,21709,3526],{"emptyLinePlaceholder":35},[1291,21711,21712,21715,21717],{"class":3427,"line":8243},[1291,21713,21714],{"class":3457},"$prompt_template",[1291,21716,4390],{"class":3435},[1291,21718,21345],{"class":3475},[1291,21720,21722],{"class":3427,"line":21721},62,[1291,21723,21724],{"class":3439},"  You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.\n",[1291,21726,21728],{"class":3427,"line":21727},63,[1291,21729,21730],{"class":3439},"  Question: {query} \n",[1291,21732,21734],{"class":3427,"line":21733},64,[1291,21735,3526],{"emptyLinePlaceholder":35},[1291,21737,21739],{"class":3427,"line":21738},65,[1291,21740,21741],{"class":3439},"  Context: {context}\n",[1291,21743,21745],{"class":3427,"line":21744},66,[1291,21746,3526],{"emptyLinePlaceholder":35},[1291,21748,21750],{"class":3427,"line":21749},67,[1291,21751,21752],{"class":3439},"  Answer:\n",[1291,21754,21756],{"class":3427,"line":21755},68,[1291,21757,3526],{"emptyLinePlaceholder":35},[1291,21759,21761,21763,21765],{"class":3427,"line":21760},69,[1291,21762,8120],{"class":3457},[1291,21764,4390],{"class":3435},[1291,21766,8125],{"class":7739},[1291,21768,21770,21772,21774],{"class":3427,"line":21769},70,[1291,21771,8131],{"class":3457},[1291,21773,4390],{"class":3435},[1291,21775,8136],{"class":3439},[1291,21777,21779,21781,21783],{"class":3427,"line":21778},71,[1291,21780,8142],{"class":3457},[1291,21782,4390],{"class":3435},[1291,21784,8147],{"class":3439},[1291,21786,21788,21791,21793],{"class":3427,"line":21787},72,[1291,21789,21790],{"class":3457},"  prompt_template",[1291,21792,4390],{"class":3435},[1291,21794,21795],{"class":3439}," $prompt_template\n",[1291,21797,21799],{"class":3427,"line":21798},73,[1291,21800,21801],{"class":3673},"  # Optionally, you can adjust the number of documents included in the context\n",[1291,21803,21805],{"class":3427,"line":21804},74,[1291,21806,8159],{"class":3673},[3206,21808,21810],{"id":21809},"key-updates","Key Updates",[145,21812,21813,21829,21838,21852],{},[148,21814,21815,21818,21819,21821,21822,21824,21825,21828],{},[169,21816,21817],{},"Parser & Prompt",": Switch from the default ",[3061,21820,4531],{}," to ",[3061,21823,20996],{}," for OCR-like handling of PPTX\u002FPDF pages, along with a new ",[3061,21826,21827],{},"parse_prompt"," that ensures table data is captured.",[148,21830,21831,21834,21835,21837],{},[169,21832,21833],{},"Embedder",": Use ",[3061,21836,4268],{}," instead of an OpenAI-based embedder to integrate Gemini 2.0 capabilities.",[148,21839,21840,21843,21844,21846,21847,21849,21850,694],{},[169,21841,21842],{},"LLM Choice",": A new ",[3061,21845,4188],{}," instance references ",[3061,21848,21294],{}," for OCR tasks, while the QA step continues using ",[3061,21851,7849],{},[148,21853,21854,21857],{},[169,21855,21856],{},"Removed Unused Services",": Comments for SharePoint or GDrive imports are removed for clarity, leaving a minimal setup focused on local files.",[73,21859,21860],{},"This configuration ensures your pipeline is optimized for vision-based parsing using Gemini 2.0 in tandem with the framework’s real-time indexing and retrieval.",[3189,21862,21864],{"id":21863},"step-5-obtain-and-update-the-pathway-live-data-framework-license-key-for-slideparser","Step 5: Obtain and Update the Pathway Live Data Framework License Key for SlideParser",[73,21866,8460,21867,8464,21870,21872],{},[169,21868,21869],{},"advanced features like SlideParser",[169,21871,8467],{},". This key unlocks additional enterprise-grade capabilities such as enhanced RAM limits, enterprise connectors (e.g., SharePoint, Delta Table, Iceberg), full persistence, and monitoring.",[73,21874,8471,21875,8476],{},[77,21876,8475],{"href":7548},[73,21878,8479,21879,8482],{},[3061,21880,7585],{},[3418,21882,21884],{"className":3420,"code":21883,"language":3422,"meta":23,"style":23},"pw.set_license_key(\"your-license-key-here\")\n",[3061,21885,21886],{"__ignoreMap":23},[1291,21887,21888,21890,21892,21894,21896,21898,21901,21903],{"class":3427,"line":3428},[1291,21889,3841],{"class":3431},[1291,21891,694],{"class":3435},[1291,21893,3846],{"class":3812},[1291,21895,3816],{"class":3435},[1291,21897,3691],{"class":3435},[1291,21899,21900],{"class":3439},"your-license-key-here",[1291,21902,3691],{"class":3435},[1291,21904,3827],{"class":3435},[73,21906,21907],{},"This ensures that SlideParser and other advanced features are enabled in your application.",[3189,21909,21911,21912,21914],{"id":21910},"step-6-update-the-env-file-with-your-api-keys","Step 6: Update the ",[3061,21913,7612],{}," File with Your API Keys",[73,21916,7675,21917,7679,21919,21921],{},[3061,21918,7678],{},[3061,21920,7612],{}," and update it with your keys:",[3418,21923,21927],{"className":21924,"code":21925,"filename":7612,"language":21926,"meta":23,"style":23},"language-dotenv shiki shiki-themes material-theme-palenight","GEMINI_API_KEY=\nGOOGLE_API_KEY=\nOPENAI_API_KEY=\n","dotenv",[3061,21928,21929,21934,21939],{"__ignoreMap":23},[1291,21930,21931],{"class":3427,"line":3428},[1291,21932,21933],{},"GEMINI_API_KEY=\n",[1291,21935,21936],{"class":3427,"line":24},[1291,21937,21938],{},"GOOGLE_API_KEY=\n",[1291,21940,21941],{"class":3427,"line":675},[1291,21942,21943],{},"OPENAI_API_KEY=\n",[73,21945,7691],{},[3189,21947,21949],{"id":21948},"step-7-running-the-project","Step 7: Running the Project",[3206,21951,8518],{"id":8517},[73,21953,8521],{},[3418,21955,21956],{"className":6347,"code":8524,"language":6349,"meta":23,"style":23},[3061,21957,21958],{"__ignoreMap":23},[1291,21959,21960,21962,21964,21966],{"class":3427,"line":3428},[1291,21961,6357],{"class":6356},[1291,21963,6360],{"class":3439},[1291,21965,8535],{"class":3439},[1291,21967,8538],{"class":3439},[73,21969,8541],{},[3418,21971,21972],{"className":6347,"code":8544,"language":6349,"meta":23,"style":23},[3061,21973,21974],{"__ignoreMap":23},[1291,21975,21976,21978],{"class":3427,"line":3428},[1291,21977,3422],{"class":6356},[1291,21979,8553],{"class":3439},[3206,21981,8557],{"id":8556},[73,21983,21984],{},"Build and run the Docker image. Note that this step might take a few minutes.",[21986,21987,21989],"h5",{"id":21988},"build-the-docker-image","Build the Docker Image:",[3418,21991,21993],{"className":6347,"code":21992,"language":6349,"meta":23,"style":23},"docker build -t rag .\n",[3061,21994,21995],{"__ignoreMap":23},[1291,21996,21997,21999,22002,22005,22008],{"class":3427,"line":3428},[1291,21998,7331],{"class":6356},[1291,22000,22001],{"class":3439}," build",[1291,22003,22004],{"class":3439}," -t",[1291,22006,22007],{"class":3439}," rag",[1291,22009,22010],{"class":3439}," .\n",[73,22012,22013],{},[169,22014,22015],{},"Run the Docker Container:",[73,22017,22018],{},"Mount the data folder and expose port 8000.",[145,22020,22021,22053],{},[148,22022,22023,22024],{},"For Windows:\n",[3418,22025,22027],{"className":6347,"code":22026,"language":6349,"meta":23,"style":23},"docker run -v \"%cd%\u002Fdata:\u002Fapp\u002Fdata\" -p 8000:8000 rag\n",[3061,22028,22029],{"__ignoreMap":23},[1291,22030,22031,22033,22035,22038,22040,22043,22045,22047,22050],{"class":3427,"line":3428},[1291,22032,7331],{"class":6356},[1291,22034,17106],{"class":3439},[1291,22036,22037],{"class":3439}," -v",[1291,22039,3705],{"class":3435},[1291,22041,22042],{"class":3439},"%cd%\u002Fdata:\u002Fapp\u002Fdata",[1291,22044,3691],{"class":3435},[1291,22046,6412],{"class":3439},[1291,22048,22049],{"class":3439}," 8000:8000",[1291,22051,22052],{"class":3439}," rag\n",[148,22054,22055,22056],{},"For Linux\u002FMac:\n",[3418,22057,22059],{"className":6347,"code":22058,"language":6349,"meta":23,"style":23},"docker run -v \"$(pwd)\u002Fdata:\u002Fapp\u002Fdata\" -p 8000:8000 --env-file .env rag\n",[3061,22060,22061],{"__ignoreMap":23},[1291,22062,22063,22065,22067,22069,22072,22075,22077,22080,22082,22084,22086,22089,22092],{"class":3427,"line":3428},[1291,22064,7331],{"class":6356},[1291,22066,17106],{"class":3439},[1291,22068,22037],{"class":3439},[1291,22070,22071],{"class":3435}," \"$(",[1291,22073,22074],{"class":3812},"pwd",[1291,22076,713],{"class":3435},[1291,22078,22079],{"class":3439},"\u002Fdata:\u002Fapp\u002Fdata",[1291,22081,3691],{"class":3435},[1291,22083,6412],{"class":3439},[1291,22085,22049],{"class":3439},[1291,22087,22088],{"class":3439}," --env-file",[1291,22090,22091],{"class":3439}," .env",[1291,22093,22052],{"class":3439},[73,22095,22096],{},"This will start the pipeline and the UI for asking questions.",[3189,22098,22100],{"id":22099},"step-8-querying-the-pipeline","Step 8: Querying the Pipeline",[73,22102,22103,22104,22107,22108,694],{},"Once your service is running on your chosen host and port (by default, ",[3061,22105,22106],{},"0.0.0.0:8000","), you can test the service using ",[3061,22109,17507],{},[3206,22111,8603],{"id":8602},[73,22113,22114],{},"Make a POST request to list the files currently indexed:",[3418,22116,22118],{"className":6347,"code":22117,"language":6349,"meta":23,"style":23},"curl -X 'POST' \\\n  'http:\u002F\u002F0.0.0.0:8000\u002Fv2\u002Flist_documents' \\\n  -H 'accept: *\u002F*' \\\n  -H 'Content-Type: application\u002Fjson'\n",[3061,22119,22120,22137,22149,22163],{"__ignoreMap":23},[1291,22121,22122,22124,22127,22129,22132,22134],{"class":3427,"line":3428},[1291,22123,17507],{"class":6356},[1291,22125,22126],{"class":3439}," -X",[1291,22128,6415],{"class":3435},[1291,22130,22131],{"class":3439},"POST",[1291,22133,3436],{"class":3435},[1291,22135,22136],{"class":3431}," \\\n",[1291,22138,22139,22142,22145,22147],{"class":3427,"line":24},[1291,22140,22141],{"class":3435},"  '",[1291,22143,22144],{"class":3439},"http:\u002F\u002F0.0.0.0:8000\u002Fv2\u002Flist_documents",[1291,22146,3436],{"class":3435},[1291,22148,22136],{"class":3431},[1291,22150,22151,22154,22156,22159,22161],{"class":3427,"line":675},[1291,22152,22153],{"class":3439},"  -H",[1291,22155,6415],{"class":3435},[1291,22157,22158],{"class":3439},"accept: *\u002F*",[1291,22160,3436],{"class":3435},[1291,22162,22136],{"class":3431},[1291,22164,22165,22167,22169,22172],{"class":3427,"line":3542},[1291,22166,22153],{"class":3439},[1291,22168,6415],{"class":3435},[1291,22170,22171],{"class":3439},"Content-Type: application\u002Fjson",[1291,22173,5188],{"class":3435},[73,22175,22176],{},"You should receive a response similar to:",[3418,22178,22180],{"className":8621,"code":22179,"language":8623,"meta":23,"style":23},"[{\"modified_at\": 1715765613, \"owner\": \"saksham\", \"path\": \"data\u002F20230203_alphabet_10K.pdf\", \"seen_at\": 1715768762}]\n",[3061,22181,22182],{"__ignoreMap":23},[1291,22183,22184,22186,22188,22190,22192,22194,22197,22199,22201,22203,22205,22207,22209,22211,22213,22215,22217,22219,22221,22223,22225,22228,22230,22232,22234,22236,22238,22240,22243],{"class":3427,"line":3428},[1291,22185,8630],{"class":3435},[1291,22187,3691],{"class":3435},[1291,22189,8647],{"class":7739},[1291,22191,3691],{"class":3435},[1291,22193,4390],{"class":3435},[1291,22195,22196],{"class":3451}," 1715765613",[1291,22198,3566],{"class":3435},[1291,22200,3705],{"class":3435},[1291,22202,8661],{"class":7739},[1291,22204,3691],{"class":3435},[1291,22206,4390],{"class":3435},[1291,22208,3705],{"class":3435},[1291,22210,7342],{"class":3439},[1291,22212,3691],{"class":3435},[1291,22214,3566],{"class":3435},[1291,22216,3705],{"class":3435},[1291,22218,8679],{"class":7739},[1291,22220,3691],{"class":3435},[1291,22222,4390],{"class":3435},[1291,22224,3705],{"class":3435},[1291,22226,22227],{"class":3439},"data\u002F20230203_alphabet_10K.pdf",[1291,22229,3691],{"class":3435},[1291,22231,3566],{"class":3435},[1291,22233,3705],{"class":3435},[1291,22235,8697],{"class":7739},[1291,22237,3691],{"class":3435},[1291,22239,4390],{"class":3435},[1291,22241,22242],{"class":3451}," 1715768762",[1291,22244,8707],{"class":3435},[73,22246,8710],{},[3206,22248,8714],{"id":8713},[73,22250,22251],{},"Test the retrieval-augmented generation (RAG) capability by asking a question about a table within a report. For example, run the following command:",[3418,22253,22255],{"className":6347,"code":22254,"language":6349,"meta":23,"style":23},"curl -X 'POST' \\\n  'http:\u002F\u002F0.0.0.0:8000\u002Fv2\u002Fanswer' \\\n  -H 'accept: *\u002F*' \\\n  -H 'Content-Type: application\u002Fjson' \\\n  -d '{\n  \"prompt\": \"How much was Operating lease cost in 2021?\" \n}'\n",[3061,22256,22257,22271,22282,22294,22306,22316,22321],{"__ignoreMap":23},[1291,22258,22259,22261,22263,22265,22267,22269],{"class":3427,"line":3428},[1291,22260,17507],{"class":6356},[1291,22262,22126],{"class":3439},[1291,22264,6415],{"class":3435},[1291,22266,22131],{"class":3439},[1291,22268,3436],{"class":3435},[1291,22270,22136],{"class":3431},[1291,22272,22273,22275,22278,22280],{"class":3427,"line":24},[1291,22274,22141],{"class":3435},[1291,22276,22277],{"class":3439},"http:\u002F\u002F0.0.0.0:8000\u002Fv2\u002Fanswer",[1291,22279,3436],{"class":3435},[1291,22281,22136],{"class":3431},[1291,22283,22284,22286,22288,22290,22292],{"class":3427,"line":675},[1291,22285,22153],{"class":3439},[1291,22287,6415],{"class":3435},[1291,22289,22158],{"class":3439},[1291,22291,3436],{"class":3435},[1291,22293,22136],{"class":3431},[1291,22295,22296,22298,22300,22302,22304],{"class":3427,"line":3542},[1291,22297,22153],{"class":3439},[1291,22299,6415],{"class":3435},[1291,22301,22171],{"class":3439},[1291,22303,3436],{"class":3435},[1291,22305,22136],{"class":3431},[1291,22307,22308,22311,22313],{"class":3427,"line":3547},[1291,22309,22310],{"class":3439},"  -d",[1291,22312,6415],{"class":3435},[1291,22314,22315],{"class":3439},"{\n",[1291,22317,22318],{"class":3427,"line":3572},[1291,22319,22320],{"class":3439},"  \"prompt\": \"How much was Operating lease cost in 2021?\" \n",[1291,22322,22323,22325],{"class":3427,"line":3614},[1291,22324,9671],{"class":3439},[1291,22326,5188],{"class":3435},[73,22328,22329],{},"You should receive a correct response such as:",[3418,22331,22333],{"className":8621,"code":22332,"language":8623,"meta":23,"style":23},"{\"response\": \"$2,699 million\"}\n",[3061,22334,22335],{"__ignoreMap":23},[1291,22336,22337,22339,22341,22343,22345,22347,22349,22352,22354],{"class":3427,"line":3428},[1291,22338,8770],{"class":3435},[1291,22340,3691],{"class":3435},[1291,22342,4991],{"class":7739},[1291,22344,3691],{"class":3435},[1291,22346,4390],{"class":3435},[1291,22348,3705],{"class":3435},[1291,22350,22351],{"class":3439},"$2,699 million",[1291,22353,3691],{"class":3435},[1291,22355,4441],{"class":3435},[73,22357,22358],{},"The initial LLM parsing step allows the system to include the relevant table data in the context, enabling accurate answers where other RAG applications might struggle.",[22360,22361],"hr",{},[140,22363,22365],{"id":22364},"understanding-your-rag-pipeline","Understanding your RAG pipeline",[665,22367,22368],{},[148,22369,22370],{},[169,22371,22372],{},"Data Ingestion",[73,22374,22375,22376,22378],{},"Pathway reads files from ",[3061,22377,3928],{}," as binary streams, ready for live updates.",[665,22380,22381],{"start":24},[148,22382,22383],{},[169,22384,22385],{},"Document Parsing",[73,22387,22388],{},"PDF pages or PPTX convert to images, and the LLM is prompted to extract text, tables, etc.",[665,22390,22391],{"start":675},[148,22392,22393],{},[169,22394,22395],{},"Chunking & Embedding",[73,22397,22398],{},"The parsed text is split into semantic chunks and embedded, with the framework storing these embeddings in an integrated vector store.",[665,22400,22401],{"start":3542},[148,22402,22403],{},[169,22404,22405],{},"Indexing & Querying",[73,22407,22408],{},"For queries, the framework retrieves relevant chunks, then an LLM composes the final answer. The entire flow is “live,” so newly ingested docs are instantly queryable.",[140,22410,22412],{"id":22411},"slideparser-parameters-overview","SlideParser Parameters Overview",[73,22414,22415,22416,4390],{},"Here are the parameters for the ",[169,22417,22418],{},[77,22419,20996],{"href":22420},"\u002Fdevelopers\u002Fapi-docs\u002Fpathway-xpacks-llm\u002Fparsers#pathway.xpacks.llm.parsers.SlideParser",[3418,22422,22424],{"className":3420,"code":22423,"language":3422,"meta":23,"style":23},"class SlideParser(llm=DEFAULT_VISION_LLM, parse_prompt=prompts.DEFAULT_IMAGE_PARSE_PROMPT, detail_parse_schema=None, include_schema_in_text=False, intermediate_image_format='jpg', image_size=(1280, 720), run_mode='parallel', retry_strategy=ExponentialBackoffRetryStrategy(max_retries=6), cache_strategy=None)\n",[3061,22425,22426],{"__ignoreMap":23},[1291,22427,22428,22430,22433,22435,22437,22439,22442,22444,22447,22449,22451,22453,22455,22457,22460,22463,22466,22468,22471,22473,22475,22478,22480,22482,22485,22488,22491,22493,22496,22498,22501,22503,22505,22508,22510,22512,22514,22516,22518,22520,22522,22524,22526,22528,22530],{"class":3427,"line":3428},[1291,22429,16356],{"class":7739},[1291,22431,22432],{"class":6356}," SlideParser",[1291,22434,3816],{"class":3435},[1291,22436,3627],{"class":3819},[1291,22438,3738],{"class":3435},[1291,22440,22441],{"class":3431},"DEFAULT_VISION_LLM",[1291,22443,3566],{"class":3435},[1291,22445,22446],{"class":3819}," parse_prompt",[1291,22448,3738],{"class":3435},[1291,22450,4702],{"class":6356},[1291,22452,694],{"class":3435},[1291,22454,4506],{"class":3457},[1291,22456,3566],{"class":3435},[1291,22458,22459],{"class":3819}," detail_parse_schema",[1291,22461,22462],{"class":3435},"=None,",[1291,22464,22465],{"class":3819}," include_schema_in_text",[1291,22467,6976],{"class":3435},[1291,22469,22470],{"class":3819}," intermediate_image_format",[1291,22472,3738],{"class":3435},[1291,22474,3436],{"class":3435},[1291,22476,22477],{"class":3439},"jpg",[1291,22479,3436],{"class":3435},[1291,22481,3566],{"class":3435},[1291,22483,22484],{"class":3819}," image_size",[1291,22486,22487],{"class":3435},"=(",[1291,22489,22490],{"class":3451},"1280",[1291,22492,3566],{"class":3435},[1291,22494,22495],{"class":3451}," 720",[1291,22497,5639],{"class":3435},[1291,22499,22500],{"class":3819}," run_mode",[1291,22502,3738],{"class":3435},[1291,22504,3436],{"class":3435},[1291,22506,22507],{"class":3439},"parallel",[1291,22509,3436],{"class":3435},[1291,22511,3566],{"class":3435},[1291,22513,10861],{"class":3819},[1291,22515,3738],{"class":3435},[1291,22517,4219],{"class":6356},[1291,22519,3816],{"class":3435},[1291,22521,4224],{"class":3819},[1291,22523,3738],{"class":3435},[1291,22525,4229],{"class":3451},[1291,22527,5639],{"class":3435},[1291,22529,10937],{"class":3819},[1291,22531,22532],{"class":3435},"=None)\n",[73,22534,22535,4390],{},[169,22536,22537],{},"Parameters",[145,22539,22540,22545,22550,22556,22562,22567,22573,22579,22585],{},[148,22541,22542,22544],{},[169,22543,3627],{},": The LLM used for parsing images (must support image inputs).",[148,22546,22547,22549],{},[169,22548,21827],{},": Prompt fed to the LLM to guide parsing.",[148,22551,22552,22555],{},[169,22553,22554],{},"detail_parse_schema",": An optional Pydantic schema for a deeper second-pass parse (if needed).",[148,22557,22558,22561],{},[169,22559,22560],{},"include_schema_in_text",": If True, merges the schema parse into the text output—handy for search or referencing.",[148,22563,22564,22566],{},[169,22565,19795],{},": Format for intermediate slides (“jpg” by default).",[148,22568,22569,22572],{},[169,22570,22571],{},"image_size",": Tuple of (width, height) in pixels for generating images.",[148,22574,22575,22578],{},[169,22576,22577],{},"run_mode",": \"parallel\" or \"sequential.\" Parallel is faster, but sequential can reduce timeouts or memory issues.",[148,22580,22581,22584],{},[169,22582,22583],{},"retry_strategy",": Recommended for robust calls to proprietary LLMs.",[148,22586,22587,22589],{},[169,22588,19820],{},": Optional caching mechanism for speed-ups.",[73,22591,22592,22593,694],{},"For more details, visit the ",[77,22594,22595],{"href":22420},"SlideParser API documentation",[140,22597,22599],{"id":22598},"ingesting-millions-of-pdfs-why-gemini-20-is-a-game-changer","Ingesting Millions of PDFs: Why Gemini 2.0 is a Game Changer",[73,22601,22602],{},"A common pain point in real-world doc ingestion is handling non-trivial layouts—tables, images, multilingual text, etc. Many approaches require orchestrating multiple specialized models for layout detection and table parsing (e.g., Kubernetes clusters with multiple GPU services). This can get expensive and complicated, particularly when scaling to millions of documents.",[73,22604,22605],{},"Gemini 2.0 flips that equation: it merges near-perfect OCR and chunking performance with far better cost-efficiency than older solutions. The Pathway Live Data Framework then syncs these parsed documents continuously, keeping your retrieval pipeline accurate even with large or fast-changing data volumes.",[3189,22607,22609],{"id":22608},"more-on-table-extraction","More on Table Extraction",[145,22611,22612,22618],{},[148,22613,22614,22617],{},[169,22615,22616],{},"Table Extraction"," remains the toughest challenge. Real-world PDF table layouts are unpredictable. Gemini 2.0 often handles the content well, though some minor “structural” variations can appear.",[148,22619,22620,22623],{},[169,22621,22622],{},"Bounding Boxes",": For exact positions within a PDF (e.g. highlighting an original location), bounding boxes aren't yet perfectly supported by Gemini's vision understanding. However, this remains a solvable gap as LLMs gain more robust layout training.",[73,22625,22626,22627,22630],{},"In short, Gemini 2.0 plus Pathway brings us closer to a future where ",[169,22628,22629],{},"document parsing is nearly effortless",", bridging the gap between high accuracy, streamlined orchestration and cost feasibility.",[22360,22632],{},[140,22634,22636],{"id":22635},"key-takeaways-conclusion","Key Takeaways & Conclusion",[145,22638,22639,22645,22651],{},[148,22640,22641,22644],{},[169,22642,22643],{},"LLM-Based Vision Parsing",": SlideParser plus Gemini 2.0 enables single-step handling of text, images, tables, and layout elements—without the hassle of multiple OCR models.",[148,22646,22647,22650],{},[169,22648,22649],{},"Integrated Pipeline",": Pathway Live Data Framework unifies “write path” ingestion (doc-to-chunks indexing) and “read path” querying (RAG), reducing complexity and overhead.",[148,22652,22653,22656],{},[169,22654,22655],{},"Scale & Affordability",": Gemini 2.0's improved pricing and accuracy make large-scale PDF ingestion far more economical than older solutions.\nFuture-Proofing: While bounding box accuracy is still evolving, the trend points toward more robust layout understanding from next-gen LLMs.",[73,22658,22659],{},"By leveraging Gemini 2.0 for OCR-like parsing and Pathway Live Data Framework for real-time ingestion, live indexing, and dynamic retrieval, you can reduce complexity in your tech stack while powering accurate, context-aware decisions. Whether you're ingesting millions of pages or handling a steady trickle of updates, this pipeline strategy ensures data remains synchronized and analysis-ready—without costly stitching of multiple microservices.",[140,22661,20822],{"id":20821},[73,22663,8858],{},[2949,22665],{},[73,22667,22668,22669,22671],{},"If you'd like to explore more, check out the ",[77,22670,13171],{"href":692},", or feel free to reach out about customizing this approach for your workflow. Happy building!",[5019,22673,22674],{},"html pre.shiki code .s5Dmg, html code.shiki .s5Dmg{--shiki-default:#FFCB6B}html pre.shiki code .sfyAc, html code.shiki .sfyAc{--shiki-default:#C3E88D}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html pre.shiki code .sdLwU, html code.shiki .sdLwU{--shiki-default:#82AAFF}html pre.shiki code .s-wAU, html code.shiki .s-wAU{--shiki-default:#F07178}html pre.shiki code .sAklC, html code.shiki .sAklC{--shiki-default:#89DDFF}html pre.shiki code .sJ14y, html code.shiki .sJ14y{--shiki-default:#C792EA}html pre.shiki code .sbqyR, html code.shiki .sbqyR{--shiki-default:#FF9CAC}html pre.shiki code .sx098, html code.shiki .sx098{--shiki-default:#F78C6C}html pre.shiki code .s6cf3, html code.shiki .s6cf3{--shiki-default:#89DDFF;--shiki-default-font-style:italic}html pre.shiki code .s0W1g, html code.shiki .s0W1g{--shiki-default:#BABED8}html pre.shiki code .saEQR, html code.shiki .saEQR{--shiki-default:#676E95;--shiki-default-font-style:italic}html pre.shiki code .s7ZW3, html code.shiki .s7ZW3{--shiki-default:#BABED8;--shiki-default-font-style:italic}",{"title":23,"searchDepth":24,"depth":24,"links":22676},[22677,22684,22697,22698,22699,22702,22703],{"id":20962,"depth":24,"text":20963,"children":22678},[22679,22680,22681,22682,22683],{"id":20966,"depth":675,"text":20967},{"id":20984,"depth":675,"text":20985},{"id":21006,"depth":675,"text":21007},{"id":21018,"depth":675,"text":21019},{"id":21036,"depth":675,"text":21037},{"id":21054,"depth":24,"text":21055,"children":22685},[22686,22687,22688,22689,22691,22692,22693,22695,22696],{"id":21115,"depth":675,"text":21118},{"id":21125,"depth":675,"text":21126},{"id":21165,"depth":675,"text":21166},{"id":21184,"depth":675,"text":22690},"Step 3: Modify Dockerfile",{"id":7694,"depth":675,"text":8910},{"id":21863,"depth":675,"text":21864},{"id":21910,"depth":675,"text":22694},"Step 6: Update the .env File with Your API Keys",{"id":21948,"depth":675,"text":21949},{"id":22099,"depth":675,"text":22100},{"id":22364,"depth":24,"text":22365},{"id":22411,"depth":24,"text":22412},{"id":22598,"depth":24,"text":22599,"children":22700},[22701],{"id":22608,"depth":675,"text":22609},{"id":22635,"depth":24,"text":22636},{"id":20821,"depth":24,"text":20822},"Learn how Gemini 2.0 and Pathway Live Data Framework efficiently streamline document ingestion, OCR and data analysis.",{"layout":90,"date":22706,"thumbnail":22707,"tags":22709,"coauthors":22710,"hidden":35},"2025-02-20",{"src":22708,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fgemini-2-th.png",[90,6268],[22711],{"name":8945,"description\"":8946,"img":10,"provider":11,"linkedin":8947},"\u002Fframework\u002Fblog\u002Fgemini2-document-ingestion-and-analytics",{"title":20927,"description":22704},{"loc":22712},"framework\u002Fblog\u002F878.gemini2-document-ingestion-and-analytics","DCV4wWcwx2URiQuyAEsMLQN8WxWq389s6LGzC9JPXEQ",{"id":22718,"title":22719,"author":22720,"body":22725,"description":23026,"extension":27,"meta":23027,"navigation":35,"path":23035,"seo":23036,"sitemap":23037,"stem":23038,"__hash__":23039},"content\u002Fframework\u002Fblog\u002F879.pathway-apache-iceberg-connectors.md","Apache Iceberg Connectors for Real-Time Data Pipelines with Pathway Live Data Framework",{"name":22721,"description":22722,"img":22723,"provider":11,"linkedin":22724},"Shlok Srivastava","Lead Engineer at Pine Labs","\u002Fassets\u002Fblog\u002Favatars\u002Fshlok-srivastava-av.jpg","https:\u002F\u002Fwww.linkedin.com\u002Fin\u002Fshlok15\u002F",{"type":13,"value":22726,"toc":23017},[22727,22732,22745,22756,22760,22763,22795,22809,22812,22832,22835,22839,22843,22846,22897,22900,22906,22910,22917,22960,22986,22990,22997,23010,23014],[1141,22728],{":zoomable":1143,"alt":22729,"className":22730,"src":22731},"Apache Iceberg + Pathway banner",[133],"\u002Fassets\u002Fcontent\u002Fblog\u002Fapache-iceberg-banner.png",[73,22733,22734,22735,22737,22738,22741,22742,22744],{},"We’re excited to share that ",[169,22736,712],{}," has officially released ",[169,22739,22740],{},"Apache Iceberg connectors",", enabling you to seamlessly integrate and manage your data in Iceberg with the full power of Pathway’s ",[169,22743,1279],{},". These connectors make it simpler than ever to harness Iceberg’s flexible table format while taking advantage of Pathway’s real-time computation engine.",[73,22746,22747,22748,22752,22753,694],{},"You'll find a comprehensive summary below. If you'd like to directly start the implementation, head over to the ",[77,22749,22751],{"href":22750},"\u002Fdevelopers\u002Fapi-docs\u002Fpathway-io\u002Ficeberg#pwioiceberg","documentation here"," and to schedule a call with Pathway regarding its capabilities with Apache Iceberg, book a slot ",[77,22754,3147],{"href":22755},"#transform-your-data-pipelines-with-confidence",[140,22757,22759],{"id":22758},"benefits-of-using-pathways-iceberg-connectors","Benefits of Using Pathway’s Iceberg Connectors",[73,22761,22762],{},"Below are some compelling reasons for using the Pathway Iceberg connectors:",[665,22764,22765,22771,22777,22783,22789],{},[148,22766,22767,22770],{},[169,22768,22769],{},"Near Real-Time Insights",":\nThe streaming capabilities let you capture incremental changes instantly, making it perfect for event-driven use cases or live dashboards.",[148,22772,22773,22776],{},[169,22774,22775],{},"Simplicity & Efficiency",":\nSetting up your Iceberg read\u002Fwrite logic takes only a handful of lines in Python, reducing the complexity of your pipeline.",[148,22778,22779,22782],{},[169,22780,22781],{},"Scalability",":\nPathway’s distributed engine and Iceberg’s optimized file format let you handle very large datasets without sacrificing performance.",[148,22784,22785,22788],{},[169,22786,22787],{},"Unified Data Workflows",":\nIntegrate multiple data sources—like CSV, NATS, Kafka, Postgres, or MongoDB—and funnel all transformations into a single Iceberg table for easy query and analytics.",[148,22790,22791,22794],{},[169,22792,22793],{},"Ideal for AI and Machine Learning",":\nLow-latency updates keep your models current. Continuous training or inference becomes straightforward when your pipeline is always up to date.",[73,22796,22797,22798,22801,22802,3126,22805,22808],{},"By introducing ",[169,22799,22800],{},"built-in connectors"," for Iceberg, Pathway extends its commitment to ",[169,22803,22804],{},"scalable data ingestion",[169,22806,22807],{},"real-time analytics",", letting you tap into the power of Iceberg in just a few lines of code.",[73,22810,22811],{},"Here’s a quick look at what these new connectors bring:",[145,22813,22814,22820,22826],{},[148,22815,22816,22819],{},[169,22817,22818],{},"Static and Streaming Modes",": Read data once (static) or continuously monitor for changes (streaming).",[148,22821,22822,22825],{},[169,22823,22824],{},"Two-Way Integration",": Not only can you read from Iceberg, but you can also write changes back into Iceberg storage.",[148,22827,22828,22831],{},[169,22829,22830],{},"Schema-Driven",": The connectors rely on Python-based schema definitions, making it easy to pick and choose which fields you need.",[73,22833,22834],{},"The sections below goes deeper into some key details about the new connectors.",[140,22836,22838],{"id":22837},"key-implementation-details","Key Implementation Details",[3189,22840,22842],{"id":22841},"reading-from-iceberg","Reading from Iceberg",[73,22844,22845],{},"The Pathway Iceberg connector enables efficient data retrieval from iceberg tables. Here’s how it works:",[145,22847,22848,22862,22884],{},[148,22849,22850,22853,22854,22857,22858,22861],{},[169,22851,22852],{},"Static or Streaming Mode",": The connectors support two modes. ",[169,22855,22856],{},"Static Mode"," reads your existing data exactly once—ideal for batch analyses. ",[169,22859,22860],{},"Streaming Mode"," continuously monitors updates to your Iceberg tables, capturing row additions and deletions in real time.",[148,22863,22864,22867,22868,14364,22871,14364,22874,14364,22876,22879,22880,22883],{},[169,22865,22866],{},"Schema Definition",": You can specify each column’s type (e.g., ",[3061,22869,22870],{},"int",[3061,22872,22873],{},"bool",[3061,22875,7171],{},[3061,22877,22878],{},"float",") in a Python class. You can also mark certain columns as primary keys using ",[3061,22881,22882],{},"pw.column_definition(primary_key=True)"," to uniquely identify rows, especially important in streaming mode. Head over to the developer documentation for more details. ",[148,22885,22886,22889,22890,15739,22893,22896],{},[169,22887,22888],{},"Integration with Pathway",": The connector ",[169,22891,22892],{},"automatically",[169,22894,22895],{},"reflects changes"," in your computational graph. Any new data that appears in the underlying Iceberg table is immediately visible in Pathway’s pipeline.",[73,22898,22899],{},"Once you’ve set up your reading mechanism, you’re ready to incorporate this data into your transformations or AI pipelines. ",[73,22901,22902,22903,22905],{},"Next, let’s examine how to ",[169,22904,9700],{}," your processed data back to Iceberg.",[3189,22907,22909],{"id":22908},"writing-to-iceberg","Writing to Iceberg",[73,22911,22912,22913,22916],{},"After reading and processing your data, you may want to ",[169,22914,22915],{},"publish your results"," into an Iceberg table. Below are some of the highlights of the write connectors, head to the API documentation for a full list of configuration knobs that you can tune. ",[145,22918,22919,22925,22943],{},[148,22920,22921,22924],{},[169,22922,22923],{},"Automatic Table Creation",": If you haven’t created the table or namespace yet, Pathway can do it for you, inferring the schema from the table you’re writing.",[148,22926,22927,22930,22931,22934,22935,22938,22939,22942],{},[169,22928,22929],{},"Change Tracking",": Pathway uses two special columns—",[3061,22932,22933],{},"time"," (representing the minibatch of computation) and ",[3061,22936,22937],{},"diff"," (indicating whether a row is being added or removed)—to accurately mirror ",[169,22940,22941],{},"all real-time changes"," in your dataset.",[148,22944,22945,22948,22949,22952,22953,22956,22957,694],{},[169,22946,22947],{},"Commit Frequency",": Configure ",[3061,22950,22951],{},"min_commit_frequency"," to manage how often Pathway writes changes to storage, balancing ",[169,22954,22955],{},"real-time responsiveness"," with ",[169,22958,22959],{},"I\u002FO overhead",[73,22961,22962,22963,22966,22967,14364,22969,14364,22971,14364,22973,22975,22976,14364,22979,14368,22982,22985],{},"The connectors support all ",[169,22964,22965],{},"Pathway primitive types"," (e.g., ",[3061,22968,22873],{},[3061,22970,22870],{},[3061,22972,22878],{},[3061,22974,7171],{},") which are then directly mapped to corresponding Iceberg types. ",[3061,22977,22978],{},"Duration",[3061,22980,22981],{},"Naive DateTime",[3061,22983,22984],{},"UTC DateTime"," are also supported, ensuring broad coverage for typical use cases.",[140,22987,22989],{"id":22988},"whats-next","What's Next?",[73,22991,22992,22993,22996],{},"In the coming weeks, we’ll publish an ",[169,22994,22995],{},"in-depth tutorial"," that dives deeper into advanced configurations, best practices, and performance tuning for large-scale use cases. ",[73,22998,22999,23002,23003,23006,23007,23009],{},[169,23000,23001],{},"Ready to leverage the power of Apache Iceberg with Pathway?"," Get started with a Free Pathway Live Data Framework Scale or Enterprise License and follow the instructions on this ",[77,23004,23005],{"href":22750},"link",". Or reach out to us at ",[77,23008,20843],{"href":20842}," to discuss how real-time, high-volume data processing can transform your analytics stack. We can’t wait to see what you’ll build!",[140,23011,23013],{"id":23012},"transform-your-data-pipelines-with-confidence","Transform Your Data Pipelines with Confidence",[73,23015,23016],{},"Are you eager to accelerate your data processing workflows with Apache Iceberg connectors?\nPathway is trusted by industry leaders such as NATO and Intel, and is natively available on both AWS and Azure Marketplaces. Pathway’s experts are here to help. Get a 15-minute, no-obligation consultation focused on your unique data challenges.",{"title":23,"searchDepth":24,"depth":24,"links":23018},[23019,23020,23024,23025],{"id":22758,"depth":24,"text":22759},{"id":22837,"depth":24,"text":22838,"children":23021},[23022,23023],{"id":22841,"depth":675,"text":22842},{"id":22908,"depth":675,"text":22909},{"id":22988,"depth":24,"text":22989},{"id":23012,"depth":24,"text":23013},"Seamlessly integrate & manage your data in Apache Iceberg with the full power of Pathway’s Pathway Live Data Framework",{"layout":90,"date":23028,"thumbnail":23029,"tags":23031,"coauthors":23032,"hidden":35},"2025-02-11",{"src":23030,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fazure-iceberg-th.png",[90,17824],[23033],{"name":17837,"description":23034,"img":17839,"provider":11,"linkedin":17840},"Lead Software Research Engineer at Pathway","\u002Fframework\u002Fblog\u002Fpathway-apache-iceberg-connectors",{"title":22719,"description":23026},{"loc":23035},"framework\u002Fblog\u002F879.pathway-apache-iceberg-connectors","MwKS3Kqr3JSJGOm511HB53A6dos27NXDG9cJg-BjuZ8",{"id":23041,"title":23042,"author":23043,"body":23044,"description":23790,"extension":27,"meta":23791,"navigation":35,"path":23796,"seo":23797,"sitemap":23798,"stem":23799,"__hash__":23800},"content\u002Fframework\u002Fblog\u002F880.deepseek-ollama.md","Real-Time AI Pipeline with DeepSeek, Ollama and Pathway",{"id":7342,"url":7343,"name":7344,"description":7345,"img":7346,"provider":11,"linkedin":7347},{"type":13,"value":23045,"toc":23775},[23046,23049,23052,23064,23068,23110,23120,23124,23132,23152,23155,23181,23185,23211,23217,23223,23350,23353,23363,23367,23374,23382,23395,23403,23417,23426,23440,23447,23474,23477,23481,23484,23488,23493,23511,23519,23542,23553,23557,23565,23579,23587,23597,23604,23608,23611,23614,23617,23620,23693,23699,23703,23712,23735,23738,23745,23747,23749,23751,23767,23772],[68,23047,23042],{"id":23048},"real-time-ai-pipeline-with-deepseek-ollama-and-pathway",[73,23050,23051],{},"Retrieval-Augmented Generation (RAG) lets you build question-answering systems that rely on your own private documents rather than generic web data. The challenge is that most RAG\u002FAI pipelines rely on LLM APIs that send your data, or at least a part of it, to the LLM provider which can be a non-starter if you handle sensitive data (trade secrets, confidential IP, or GDPR-protected information). Fortunately, there is a solution to keep your data private: deploying a local LLM. A private RAG pipeline keeps your data on-premise: no external services, no data leaves your control.",[73,23053,23054,23055,23057,23058,23063],{},"In this guide, you'll learn how to use the ",[77,23056,1279],{"href":711}," to create a real-time RAG pipeline on top of DeepSeek R1, an open-source reasoning tool running locally with ",[77,23059,23062],{"href":23060,"rel":23061},"https:\u002F\u002Follama.ai\u002F",[81],"Ollama",", a lightweight framework for running local AI models.",[140,23065,23067],{"id":23066},"_1-why-local-deployment-why-deepseek-r1","1. Why Local Deployment & Why DeepSeek R1?",[145,23069,23070,23076,23082,23088,23094,23100],{},[148,23071,23072,23075],{},[169,23073,23074],{},"Complete Data Privacy",": By running a local LLM, none of your data ever leaves your servers. This is crucial for protecting trade secrets, GDPR-sensitive information, and other confidential materials.",[148,23077,23078,23081],{},[169,23079,23080],{},"Strong Reasoning",": DeepSeek-R1 is a first-generation reasoning model offering performance on par with OpenAI-o1 across math, code, and complex reasoning tasks, including six dense models distilled from DeepSeek-R1 based on Llama and Qwen.",[148,23083,23084,23087],{},[169,23085,23086],{},"Flexibility in Model Size",": DeepSeek R1 provides multiple variants ranging from 1.5B parameters (lightweight) to significantly larger sizes (for more complex tasks). You can pick a sweet spot based on your hardware and performance needs.",[148,23089,23090,23093],{},[169,23091,23092],{},"Pathway Live Data Framework for Real-Time RAG",": Pathway Live Data Framework syncs and indexes your data dynamically (including documents from local folders, SharePoint, or Google Drive) and features an integrated vector store. It orchestrates the entire pipeline: from reading documents, embedding them for similarity search, retrieving relevant paragraphs, and feeding them into DeepSeek R1—all in a single framework.",[148,23095,23096,23099],{},[169,23097,23098],{},"Predictable, Adaptable Performance",": With DeepSeek R1 on-prem, you aren’t subject to changing API performance or model updates from a third party. You can also potentially fine-tune or customize the model for your domain.",[148,23101,23102,23105,23106,23109],{},[169,23103,23104],{},"Ollama for Local Inference",": Ollama lets you run open-source models (including all DeepSeek R1 variants) on your machine (CPU or GPU), with a simple ",[3061,23107,23108],{},"ollama serve"," command. This means your data never leaves your server.",[73,23111,23112,23113,14364,23116,23119],{},"All together, this stack offers a ",[169,23114,23115],{},"fully private",[169,23117,23118],{},"real-time"," question-answering\u002FRAG pipeline that is easy to configure and scale.",[140,23121,23123],{"id":23122},"_2-cloning-the-private-rag-example","2. Cloning the Private RAG Example",[73,23125,23126,23127,4390],{},"Let's get started with setting up your private RAG pipeline. The easiest way is to use the example configuration, which you can find in the llm-app repository. First, clone the ",[77,23128,23131],{"href":23129,"rel":23130},"https:\u002F\u002Fgithub.com\u002Fpathwaycom\u002Fllm-app\u002Ftree\u002Fmain",[81],"Pathway llm-app repository",[3418,23133,23135],{"className":6347,"code":23134,"language":6349,"meta":23,"style":23},"git clone https:\u002F\u002Fgithub.com\u002Fpathwaycom\u002Fllm-app.git\ncd llm-app\u002Ftemplates\u002Fprivate_rag\n",[3061,23136,23137,23145],{"__ignoreMap":23},[1291,23138,23139,23141,23143],{"class":3427,"line":3428},[1291,23140,21142],{"class":6356},[1291,23142,21145],{"class":3439},[1291,23144,21148],{"class":3439},[1291,23146,23147,23149],{"class":3427,"line":24},[1291,23148,21178],{"class":3812},[1291,23150,23151],{"class":3439}," llm-app\u002Ftemplates\u002Fprivate_rag\n",[73,23153,23154],{},"Inside this folder, you’ll see:",[145,23156,23157,23161,23166,23172,23176],{},[148,23158,23159,7586],{},[3061,23160,7585],{},[148,23162,23163,23165],{},[3061,23164,7570],{},", the file containing configuration of the pipeline, like LLM models, sources or server address;",[148,23167,23168,7597,23170,7601],{},[3061,23169,7596],{},[3061,23171,7600],{},[148,23173,23174,7607],{},[3061,23175,7606],{},[148,23177,23178,23180],{},[3061,23179,5185],{},", a sample folder containing a PDF (a content license agreement) that you'll use in this demonstration",[140,23182,23184],{"id":23183},"_3-what-youll-need-to-build-a-local-rag-system","3. What You’ll Need to Build a Local RAG System",[665,23186,23187,23191,23196,23205],{},[148,23188,23189],{},[169,23190,1279],{},[148,23192,23193,23195],{},[169,23194,8931],{}," (if you’d like to run the final pipeline in a container)",[148,23197,23198,23200,23201],{},[169,23199,23062],{}," installed on your machine: ",[77,23202,23203],{"href":23203,"rel":23204},"https:\u002F\u002Follama.com\u002Fdownload",[81],[148,23206,7362,23207,23210],{},[169,23208,23209],{},"DeepSeek R1"," model pulled locally",[140,23212,23214,23215],{"id":23213},"_4-modifying-the-appyaml","4. Modifying the ",[3061,23216,7570],{},[73,23218,23219,23220,23222],{},"Open ",[3061,23221,7570],{}," and replace the default Mistral references with DeepSeek R1. Here’s the exact code snippet:",[3418,23224,23226],{"className":7720,"code":23225,"language":7722,"meta":23,"style":23},"$llm_model: \"ollama\u002Fdeepseek-r1:1.5b\"   # Switch to DeepSeek R1\n$llm: !pw.xpacks.llm.llms.LiteLLMChat\n  model: $llm_model\n  retry_strategy: !pw.udfs.ExponentialBackoffRetryStrategy\n    max_retries: 6\n  cache_strategy: !pw.udfs.DefaultCache {}\n  temperature: 0\n  api_base: \"http:\u002F\u002Fhost.docker.internal:11434\"  # if running inside Docker\nquestion_answerer: !pw.xpacks.llm.question_answering.BaseRAGQuestionAnswerer\n  llm: $llm\n  indexer: $document_store\n  search_topk: 8  # number of retrieved chunks\n",[3061,23227,23228,23245,23253,23262,23270,23278,23288,23296,23313,23321,23329,23337],{"__ignoreMap":23},[1291,23229,23230,23233,23235,23237,23240,23242],{"class":3427,"line":3428},[1291,23231,23232],{"class":3457},"$llm_model",[1291,23234,4390],{"class":3435},[1291,23236,3705],{"class":3435},[1291,23238,23239],{"class":3439},"ollama\u002Fdeepseek-r1:1.5b",[1291,23241,3691],{"class":3435},[1291,23243,23244],{"class":3673},"   # Switch to DeepSeek R1\n",[1291,23246,23247,23249,23251],{"class":3427,"line":24},[1291,23248,7832],{"class":3457},[1291,23250,4390],{"class":3435},[1291,23252,21283],{"class":7739},[1291,23254,23255,23257,23259],{"class":3427,"line":675},[1291,23256,7842],{"class":3457},[1291,23258,4390],{"class":3435},[1291,23260,23261],{"class":3439}," $llm_model\n",[1291,23263,23264,23266,23268],{"class":3427,"line":3542},[1291,23265,7856],{"class":3457},[1291,23267,4390],{"class":3435},[1291,23269,7861],{"class":7739},[1291,23271,23272,23274,23276],{"class":3427,"line":3547},[1291,23273,7866],{"class":3457},[1291,23275,4390],{"class":3435},[1291,23277,7871],{"class":3451},[1291,23279,23280,23282,23284,23286],{"class":3427,"line":3572},[1291,23281,7876],{"class":3457},[1291,23283,4390],{"class":3435},[1291,23285,7881],{"class":7739},[1291,23287,7884],{"class":3435},[1291,23289,23290,23292,23294],{"class":3427,"line":3614},[1291,23291,7889],{"class":3457},[1291,23293,4390],{"class":3435},[1291,23295,7894],{"class":3451},[1291,23297,23298,23301,23303,23305,23308,23310],{"class":3427,"line":3640},[1291,23299,23300],{"class":3457},"  api_base",[1291,23302,4390],{"class":3435},[1291,23304,3705],{"class":3435},[1291,23306,23307],{"class":3439},"http:\u002F\u002Fhost.docker.internal:11434",[1291,23309,3691],{"class":3435},[1291,23311,23312],{"class":3673},"  # if running inside Docker\n",[1291,23314,23315,23317,23319],{"class":3427,"line":3665},[1291,23316,8120],{"class":3457},[1291,23318,4390],{"class":3435},[1291,23320,8125],{"class":7739},[1291,23322,23323,23325,23327],{"class":3427,"line":3670},[1291,23324,8131],{"class":3457},[1291,23326,4390],{"class":3435},[1291,23328,8136],{"class":3439},[1291,23330,23331,23333,23335],{"class":3427,"line":3677},[1291,23332,8142],{"class":3457},[1291,23334,4390],{"class":3435},[1291,23336,8147],{"class":3439},[1291,23338,23339,23342,23344,23347],{"class":3427,"line":3877},[1291,23340,23341],{"class":3457},"  search_topk",[1291,23343,4390],{"class":3435},[1291,23345,23346],{"class":3451}," 8",[1291,23348,23349],{"class":3673},"  # number of retrieved chunks\n",[1141,23351],{":zoomable":1143,"alt":21118,"src":23352,"style":133},"\u002Fassets\u002Fcontent\u002Fblog\u002Farchitecture-diagram-deepseek.png",[73,23354,23355],{},[15804,23356,23357],{},[1291,23358,23362],{"className":23359},[23360,23361,2912],"text-center","w-full","Architecture Diagram showcasing a Real-Time AI pipeline powered by DeepSeek R1 and Pathway Live Data Framework",[140,23364,23366],{"id":23365},"_5-running-deepseek-r1-model-via-ollama","5. Running DeepSeek R1 Model via Ollama",[73,23368,23369,23370,23373],{},"Download and install Ollama from ",[77,23371,23203],{"href":23203,"rel":23372},[81],". Then, open two terminal windows:",[665,23375,23376],{},[148,23377,23378,23381],{},[169,23379,23380],{},"First Terminal"," — run the Ollama server:",[3418,23383,23385],{"className":6347,"code":23384,"language":6349,"meta":23,"style":23},"ollama serve\n",[3061,23386,23387],{"__ignoreMap":23},[1291,23388,23389,23392],{"class":3427,"line":3428},[1291,23390,23391],{"class":6356},"ollama",[1291,23393,23394],{"class":3439}," serve\n",[665,23396,23397],{"start":24},[148,23398,23399,23402],{},[169,23400,23401],{},"Second Terminal"," — pull and run DeepSeek R1:",[3418,23404,23406],{"className":6347,"code":23405,"language":6349,"meta":23,"style":23},"ollama run deepseek-r1:1.5b\n",[3061,23407,23408],{"__ignoreMap":23},[1291,23409,23410,23412,23414],{"class":3427,"line":3428},[1291,23411,23391],{"class":6356},[1291,23413,17106],{"class":3439},[1291,23415,23416],{"class":3439}," deepseek-r1:1.5b\n",[9194,23418,23419],{},[73,23420,23421,23422,23425],{},"Note that ollama serves models with 2k context length by default, this may cause low quality responses. To change the default context length, run the following: ",[3061,23423,23424],{},"\u002Fset parameter num_ctx 8192",". You may set the number up to 128K, however, model should best perform with max context length lower than 32K.",[73,23427,23428,23431,23432,23435,23436,23439],{},[15804,23429,23430],{},"Pro Tip:"," If you want to try a bigger DeepSeek R1 variant—such as ",[3061,23433,23434],{},"deepseek-r1:7b","—simply replace the ",[3061,23437,23438],{},":1.5b"," tag above. Note that larger models often require additional system resources (RAM\u002FGPU), so pick the size that fits your hardware.",[73,23441,23442,23443,23446],{},"Ollama will serve on ",[3061,23444,23445],{},"http:\u002F\u002Flocalhost:11434",". To verify your setup, you can open a third terminal and test it by sending a POST request to that endpoint:",[3418,23448,23450],{"className":6347,"code":23449,"language":6349,"meta":23,"style":23},"curl -X POST http:\u002F\u002Flocalhost:11434\u002Fapi\u002Fgenerate -d '{\"model\":\"deepseek-r1:1.5b\",\"prompt\":\"Hello\"}'\n",[3061,23451,23452],{"__ignoreMap":23},[1291,23453,23454,23456,23458,23461,23464,23467,23469,23472],{"class":3427,"line":3428},[1291,23455,17507],{"class":6356},[1291,23457,22126],{"class":3439},[1291,23459,23460],{"class":3439}," POST",[1291,23462,23463],{"class":3439}," http:\u002F\u002Flocalhost:11434\u002Fapi\u002Fgenerate",[1291,23465,23466],{"class":3439}," -d",[1291,23468,6415],{"class":3435},[1291,23470,23471],{"class":3439},"{\"model\":\"deepseek-r1:1.5b\",\"prompt\":\"Hello\"}",[1291,23473,5188],{"class":3435},[73,23475,23476],{},"If it responds with a greeting, your local LLM is working correctly.",[140,23478,23480],{"id":23479},"_6-building-running-the-rag-app","6. Building & Running the RAG App",[73,23482,23483],{},"If you are on Windows, please refer to running with docker section below.",[3189,23485,23487],{"id":23486},"option-a-run-in-docker","Option A: Run in Docker",[665,23489,23490],{},[148,23491,23492],{},"Build the Docker image from the private-rag folder (where the Dockerfile resides):",[3418,23494,23496],{"className":6347,"code":23495,"language":6349,"meta":23,"style":23},"docker build -t privaterag .\n",[3061,23497,23498],{"__ignoreMap":23},[1291,23499,23500,23502,23504,23506,23509],{"class":3427,"line":3428},[1291,23501,7331],{"class":6356},[1291,23503,22001],{"class":3439},[1291,23505,22004],{"class":3439},[1291,23507,23508],{"class":3439}," privaterag",[1291,23510,22010],{"class":3439},[665,23512,23513],{"start":24},[148,23514,23515,23516,23518],{},"Run the container, mounting your local ",[3061,23517,3935],{}," folder (so it can index your documents):",[3418,23520,23522],{"className":6347,"code":23521,"language":6349,"meta":23,"style":23},"docker run -v .\u002Fdata:\u002Fapp\u002Fdata -p 8000:8000 privaterag\n",[3061,23523,23524],{"__ignoreMap":23},[1291,23525,23526,23528,23530,23532,23535,23537,23539],{"class":3427,"line":3428},[1291,23527,7331],{"class":6356},[1291,23529,17106],{"class":3439},[1291,23531,22037],{"class":3439},[1291,23533,23534],{"class":3439}," .\u002Fdata:\u002Fapp\u002Fdata",[1291,23536,6412],{"class":3439},[1291,23538,22049],{"class":3439},[1291,23540,23541],{"class":3439}," privaterag\n",[665,23543,23544],{"start":675},[148,23545,23546,23547,23549,23550,19244],{},"The app will start on port ",[3061,23548,4939],{}," (i.e., ",[3061,23551,23552],{},"http:\u002F\u002F0.0.0.0:8000",[3189,23554,23556],{"id":23555},"option-b-run-locally-no-docker","Option B: Run Locally (No Docker)",[665,23558,23559],{},[148,23560,23561,23564],{},[169,23562,23563],{},"Install"," dependencies:",[3418,23566,23567],{"className":6347,"code":8524,"language":6349,"meta":23,"style":23},[3061,23568,23569],{"__ignoreMap":23},[1291,23570,23571,23573,23575,23577],{"class":3427,"line":3428},[1291,23572,6357],{"class":6356},[1291,23574,6360],{"class":3439},[1291,23576,8535],{"class":3439},[1291,23578,8538],{"class":3439},[665,23580,23581],{"start":24},[148,23582,23583,23586],{},[169,23584,23585],{},"Launch"," the pipeline:",[3418,23588,23589],{"className":6347,"code":8544,"language":6349,"meta":23,"style":23},[3061,23590,23591],{"__ignoreMap":23},[1291,23592,23593,23595],{"class":3427,"line":3428},[1291,23594,3422],{"class":6356},[1291,23596,8553],{"class":3439},[665,23598,23599],{"start":675},[148,23600,23601,23602,694],{},"The REST endpoint defaults to ",[3061,23603,22277],{},[140,23605,23607],{"id":23606},"_7-querying-the-pipeline","7. Querying the Pipeline",[73,23609,23610],{},"We’ve included a sample PDF file under data\u002F named:",[73,23612,23613],{},"IdeanomicsInc_20160330_10-K_EX-10.26_9512211_EX-10.26_Content License Agreement.pdf",[73,23615,23616],{},"It’s a content license agreement, which you’ll use as the knowledge base for demonstration.",[73,23618,23619],{},"Send a POST request to the pipeline to see how the framework retrieves from this PDF and generates an answer with DeepSeek R1:",[3418,23621,23623],{"className":6347,"code":23622,"language":6349,"meta":23,"style":23},"curl -X 'POST' \\\n  'http:\u002F\u002F0.0.0.0:8000\u002Fv2\u002Fanswer' \\\n  -H 'accept: *\u002F*' \\\n  -H 'Content-Type: application\u002Fjson' \\\n  -d '{\n    \"prompt\": \"What are the terms and conditions of the contract?\"\n  }'\n",[3061,23624,23625,23639,23649,23661,23673,23681,23686],{"__ignoreMap":23},[1291,23626,23627,23629,23631,23633,23635,23637],{"class":3427,"line":3428},[1291,23628,17507],{"class":6356},[1291,23630,22126],{"class":3439},[1291,23632,6415],{"class":3435},[1291,23634,22131],{"class":3439},[1291,23636,3436],{"class":3435},[1291,23638,22136],{"class":3431},[1291,23640,23641,23643,23645,23647],{"class":3427,"line":24},[1291,23642,22141],{"class":3435},[1291,23644,22277],{"class":3439},[1291,23646,3436],{"class":3435},[1291,23648,22136],{"class":3431},[1291,23650,23651,23653,23655,23657,23659],{"class":3427,"line":675},[1291,23652,22153],{"class":3439},[1291,23654,6415],{"class":3435},[1291,23656,22158],{"class":3439},[1291,23658,3436],{"class":3435},[1291,23660,22136],{"class":3431},[1291,23662,23663,23665,23667,23669,23671],{"class":3427,"line":3542},[1291,23664,22153],{"class":3439},[1291,23666,6415],{"class":3435},[1291,23668,22171],{"class":3439},[1291,23670,3436],{"class":3435},[1291,23672,22136],{"class":3431},[1291,23674,23675,23677,23679],{"class":3427,"line":3547},[1291,23676,22310],{"class":3439},[1291,23678,6415],{"class":3435},[1291,23680,22315],{"class":3439},[1291,23682,23683],{"class":3427,"line":3572},[1291,23684,23685],{"class":3439},"    \"prompt\": \"What are the terms and conditions of the contract?\"\n",[1291,23687,23688,23691],{"class":3427,"line":3614},[1291,23689,23690],{"class":3439},"  }",[1291,23692,5188],{"class":3435},[73,23694,23695,23696,23698],{},"Pathway’s vector store will retrieve the relevant documents from the ",[3061,23697,5185],{}," folder, pass them as context to DeepSeek R1, and return an answer—fully on-premise, with no external calls.",[140,23700,23702],{"id":23701},"_8-conclusion-why-pathway-live-data-framework-for-private-rag","8. Conclusion: Why Pathway Live Data Framework for Private RAG?",[73,23704,23705,23706,14364,23708,14368,23710,4390],{},"By combining ",[169,23707,23209],{},[169,23709,23062],{},[169,23711,1279],{},[145,23713,23714,23720,23729],{},[148,23715,23716,23719],{},[169,23717,23718],{},"100% On-Prem Deployment",": No third-party calls; data never leaves your local environment.",[148,23721,23722,23725,23726,23728],{},[169,23723,23724],{},"Real-time updates",": If you add or change documents in the ",[3061,23727,5185],{}," folder (or connected sources like SharePoint\u002FGoogle Drive), the framework can incrementally re-index them—keeping your knowledge base fresh.",[148,23730,23731,23734],{},[169,23732,23733],{},"Flexible & Scalable",": Because Pathway Live Data Framework orchestrates everything in a unified pipeline, you can easily swap in new LLMs, embedder models, indexing strategy or data connectors by customizing the YAML file.",[73,23736,23737],{},"This setup is ideal for organizations dealing with confidential or regulated content, or anyone who wants full control over their LLM environment. With local inference, you have a fully private LLM-based solution with predictable performance and real-time updates.",[73,23739,23740,23741,694],{},"If you’d like a deeper dive into adaptive retrieval techniques or see another example of Private RAG, check out our previous ",[77,23742,23744],{"href":23743},"\u002Fdevelopers\u002Ftemplates\u002Frag\u002Ftemplate-private-rag#customizing-the-pipeline","Private Adaptive RAG guide",[140,23746,20822],{"id":20821},[73,23748,20825],{},[22360,23750],{},[73,23752,23753,23754,23758,23759,23761,23762,23766],{},"If you found this guide helpful, be sure to check out the ",[77,23755,23757],{"href":20774,"rel":23756},[81],"full GitHub repository"," for more examples, or drop by the ",[77,23760,13171],{"href":692}," to explore advanced connectors, dynamic pipelines, and more. We’d love to hear your feedback—join the ",[77,23763,23765],{"href":7296,"rel":23764},[81],"Pathway Discord community"," or open an issue on GitHub.",[73,23768,23769],{},[169,23770,23771],{},"Happy experimenting with your fully private RAG pipeline!",[5019,23773,23774],{},"html pre.shiki code .s5Dmg, html code.shiki .s5Dmg{--shiki-default:#FFCB6B}html pre.shiki code .sfyAc, html code.shiki .sfyAc{--shiki-default:#C3E88D}html pre.shiki code .sdLwU, html code.shiki .sdLwU{--shiki-default:#82AAFF}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html pre.shiki code .s-wAU, html code.shiki .s-wAU{--shiki-default:#F07178}html pre.shiki code .sAklC, html code.shiki .sAklC{--shiki-default:#89DDFF}html pre.shiki code .saEQR, html code.shiki .saEQR{--shiki-default:#676E95;--shiki-default-font-style:italic}html pre.shiki code .sJ14y, html code.shiki .sJ14y{--shiki-default:#C792EA}html pre.shiki code .sx098, html code.shiki .sx098{--shiki-default:#F78C6C}html pre.shiki code .s0W1g, html code.shiki .s0W1g{--shiki-default:#BABED8}",{"title":23,"searchDepth":24,"depth":24,"links":23776},[23777,23778,23779,23780,23782,23783,23787,23788,23789],{"id":23066,"depth":24,"text":23067},{"id":23122,"depth":24,"text":23123},{"id":23183,"depth":24,"text":23184},{"id":23213,"depth":24,"text":23781},"4. Modifying the app.yaml",{"id":23365,"depth":24,"text":23366},{"id":23479,"depth":24,"text":23480,"children":23784},[23785,23786],{"id":23486,"depth":675,"text":23487},{"id":23555,"depth":675,"text":23556},{"id":23606,"depth":24,"text":23607},{"id":23701,"depth":24,"text":23702},{"id":20821,"depth":24,"text":20822},"Build and deploy a Private RAG pipeline powered by DeepSeek R1 —an open-source LLM with strong reasoning capabilities.",{"single":35,"aside":34,"layout":90,"date":23792,"thumbnail":23793,"tags":23795,"hidden":35},"2025-02-05",{"src":23794,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Freal-time-rag-pipeline-th.png",[90,17824,6268],"\u002Fframework\u002Fblog\u002Fdeepseek-ollama",{"title":23042,"description":23790},{"loc":23796},"framework\u002Fblog\u002F880.deepseek-ollama","2Z6IgghTRoLwccTlVyrSb7Hx2Lkb7VZ7nTh4993hyEc",{"id":23802,"title":23803,"author":23804,"body":23805,"description":23875,"extension":27,"meta":23876,"navigation":35,"path":23881,"seo":23882,"sitemap":23883,"stem":23884,"__hash__":23885},"content\u002Fframework\u002Fblog\u002F881.deploy-rag-agent-tools-with-pathway.md","Power and Deploy RAG Agent Tools with Pathway",{"id":7342,"url":7343,"name":7344,"description":7345,"img":7346,"provider":11,"linkedin":7347},{"type":13,"value":23806,"toc":23869},[23807,23810,23813,23817,23820,23824,23827,23831,23841,23848,23850,23852,23854,23861,23865],[68,23808,23803],{"id":23809},"power-and-deploy-rag-agent-tools-with-pathway",[73,23811,23812],{},"Enterprise AI in 2025 is evolving from proof of concept to live production deployments, and organizations need fast, up-to-date retrieval of information for their language models—alongside the flexibility to run advanced, multi-step agent logic. That’s where Pathway Live Data Framework comes in.",[140,23814,23816],{"id":23815},"pathway-live-data-framework-for-real-time-indexing","Pathway Live Data Framework for Real-Time Indexing",[73,23818,23819],{},"The Pathway Live Data Framework provides a real-time, incremental indexing engine that continuously syncs with your data sources (files, databases, or APIs), ensuring your agents always have access to the freshest information. By combining BM25 and semantic search, it delivers a hybrid retrieval approach that significantly boosts accuracy and recall.",[140,23821,23823],{"id":23822},"agent-orchestration-and-deployment-made-easy","Agent Orchestration and Deployment Made Easy",[73,23825,23826],{},"If your workflows require sophisticated logic—like query rewriting, relevance checks, or hallucination detection—Pathway Live Data Framework integrates seamlessly with agent orchestration tools such as LangGraph, Crew AI, AutoGen, or OpenAI Swarm. This approach lets you filter out irrelevant documents, refine user queries, generate well-grounded answers, and keep an ongoing watch for hallucinations—all within a real-time pipeline that automatically adapts to your data updates. The framework also makes it simple to expose custom agent logic via a REST endpoint, reducing overhead whether you’re building a specialized LLM application or an internal knowledge assistant.",[140,23828,23830],{"id":23829},"get-started","Get Started",[73,23832,23833,23834],{},"Explore the step-by-step cookbooks demonstrating how to combine the framework’s real-time indexing with LangGraph multi-step agent flows:\n",[1291,23835,23837],{"className":23836},[2912],[77,23838,23839],{"href":23839,"rel":23840},"https:\u002F\u002Fgithub.com\u002Fpathwaycom\u002Fllm-app\u002Fblob\u002Fmain\u002Fcookbooks\u002Fself-rag-agents\u002Fpathway_deploy_langgraph_agents.ipynb",[81],[73,23842,23843,23844],{},"If you are only interested in using Pathway Live Data Framework as always up-to-date document store and want to deploy your agents your own way (via Flask, FastAPI, etc.), then check out this cookbook: ",[77,23845,23846],{"href":23846,"rel":23847},"https:\u002F\u002Fgithub.com\u002Fpathwaycom\u002Fllm-app\u002Fblob\u002Fmain\u002Fcookbooks\u002Fself-rag-agents\u002Fpathway_langgraph_agentic_rag.ipynb",[81],[140,23849,20822],{"id":20821},[73,23851,20825],{},[22360,23853],{},[73,23855,23856,23857],{},"Check out ready-to-run app templates for RAG, AI pipelines, and enterprise search with live data: ",[77,23858,23860],{"href":15745,"rel":23859},[81],"pathwaycom\u002Fllm-app",[73,23862,20833,23863,8876],{},[3061,23864,8875],{},[73,23866,20839,23867,9175],{},[77,23868,20843],{"href":20842},{"title":23,"searchDepth":24,"depth":24,"links":23870},[23871,23872,23873,23874],{"id":23815,"depth":24,"text":23816},{"id":23822,"depth":24,"text":23823},{"id":23829,"depth":24,"text":23830},{"id":20821,"depth":24,"text":20822},"Enterprise AI in 2025 is evolving from proof of concept to live production deployments, and organizations need fast, up-to-date retrieval of information for their language models—alongside the flexibility to run advanced, multi-step agent logic. That’s where Pathway Live Data Framework comes in",{"layout":90,"date":23877,"thumbnail":23878,"tags":23880,"hidden":35},"2025-01-16",{"src":23879,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fpower-and-deploy-th.png",[90,6268],"\u002Fframework\u002Fblog\u002Fdeploy-rag-agent-tools-with-pathway",{"title":23803,"description":23875},{"loc":23881},"framework\u002Fblog\u002F881.deploy-rag-agent-tools-with-pathway","fJoxE-5Rsd9gJbc9eHq2u2PBzzKNgdC-CnW6bJlL6QU",{"id":23887,"title":23888,"author":23889,"body":23894,"description":29490,"extension":27,"meta":29491,"navigation":35,"path":29500,"seo":29501,"sitemap":29502,"stem":29503,"__hash__":29504},"content\u002Fframework\u002Fblog\u002F881.how-text-embeddings-help-suggest-similar-words.md","How Text Embeddings help suggest similar words",{"name":23890,"description":23891,"img":23892,"provider":11,"linkedin":23893},"Sajjad Nakhwa","Student at Indian Institute of Technology, Bombay","\u002Fassets\u002Fblog\u002Favatars\u002Fsajjad-avatar.png","https:\u002F\u002Fwww.linkedin.com\u002Fin\u002Fsajjad-nakhwa-801707285\u002F",{"type":13,"value":23895,"toc":29455},[23896,23899,23903,23913,23917,23920,23923,23928,23931,23935,23945,23949,23952,23978,23981,23985,23999,24006,24017,24020,24037,24536,24539,25103,25106,25110,25117,25128,25131,25614,25617,26144,26653,26656,26667,26849,26852,26856,26860,26864,26867,26874,26877,26880,26883,26891,26894,26898,26901,26905,26908,26927,26930,26934,26947,26950,26954,26958,27130,27133,27153,27158,27269,27273,27277,27286,27290,27294,27297,27370,27374,27383,27387,27733,27736,27762,27767,27771,27775,27778,27782,27786,27796,27800,27807,27810,27825,27866,28047,28051,28062,28073,28077,28085,28088,28091,28095,28098,28117,28120,28138,28240,28244,28267,28271,28342,28856,28860,28874,29003,29007,29031,29091,29116,29120,29154,29174,29189,29213,29217,29221,29235,29239,29265,29267,29273,29276,29309,29312,29395,29398,29402,29432,29436,29450,29452],[73,23897,23898],{},"The world is filled with fascinating technology. It can feel overwhelming to see such extravagant machines and systems at work, yet it is easy to overlook the intricate engineering powering our most routine tasks. Consider, for example, the smartphone you rely on every day. We often use it mindlessly—scrolling through social media, checking emails, or chatting with friends—without appreciating the sophisticated processes working behind the scenes.\nAmong the most transformative technologies embedded in smartphones are Natural Language Processing (NLP) and Machine Learning (ML). These technologies enable personal assistants like Amazon Alexa and Google Translate, enhance GPS navigation apps, filter out spam emails, and even assist with auto-correction as we write. They are seamlessly integrated into our daily lives, often hidden in plain sight.\nSignificant advancements in the field have been achieved because of Text embedding, a technique that tackles key challenges in representing words, sentences, and documents in machine-readable formats. Text embeddings enable higher accuracy and efficiency in NLP tasks and have become a foundation for many modern applications.\nThis blog post delves deeper into how Text embeddings help suggest similar words, exploring various models and techniques that have revolutionized our understanding of language.",[140,23900,23902],{"id":23901},"what-are-text-embeddings","What are Text Embeddings?",[73,23904,23905,23906,23908,23909,23912],{},"Text embeddings are a way to represent words or textual documents as large dimensional mathematical vectors, enabling computers—traditionally “dumb” in understanding language—to process text more effectively. They transform words into numerical vectors which capture their meaning based on their ",[169,23907,6097],{},". In this way, words that share similar meanings, context, or analogies are placed close together in the ",[169,23910,23911],{},"embedding space",". Here’s a highly simplified example of how the words are represented and what counts as being close to each other.",[1141,23914],{":zoomable":1143,"alt":23,"className":23915,"sizes":16088,"src":23916},[23361],"\u002Fassets\u002Fcontent\u002Fblog\u002Ftext-embeddings-article\u002Fimage-1.png",[73,23918,23919],{},"Here, the words “king” and “queen” are placed together in the space, whereas “bartender” is away from them, indicating no strong connection. While illustrations often show these embeddings in two dimensions, real-world embeddings commonly have up to 100 or more dimensions. Such high-dimensional spaces allow each vector component to represent distinct features of meaning.",[73,23921,23922],{},"Another key usage of these embeddings is to handle polysemic words, which are the same sounding\u002Fwritten words having different meanings depending on the context. For example, consider these two sentences:-",[73,23924,23925],{},[15804,23926,23927],{},"“I ate an apple.” vs. “Apple released a new phone this year.”",[73,23929,23930],{},"Here the word “apple” is being used in a different context. Text embeddings model these nuances, enabling more contextually accurate suggestions.",[140,23932,23934],{"id":23933},"understanding-languages-a-challenge-for-eons","Understanding Languages - A Challenge for Eons",[73,23936,23937,23938,3126,23941,23944],{},"Understanding languages has always been a challenge. English itself is a very complex language—full of exceptions, contradictions, and rules that even we humans struggle to follow (after all, ",[169,23939,23940],{},"fish",[169,23942,23943],{},"ghoti"," can sound the same if you’re creative enough!). So, imagine how much more difficult it is for computers to make sense of human language and all its complexity. Well thankfully, innovation and hard work throughout the ages have brought us several techniques to make things work out, and that is exactly what you will be dealing with throughout this helpful blog. You will be looking at several models that help to transform textual data into different kinds of numerical representations because, after all, that is what our computers understand. You will also see how we can use this numerical data for tons of different use cases including suggestions of similar words!",[140,23946,23948],{"id":23947},"overview-of-the-models","Overview of the Models",[73,23950,23951],{},"Several models are employed to represent and process textual data. Some of the most well-known include:",[145,23953,23954,23960,23966,23972],{},[148,23955,23956,23959],{},[169,23957,23958],{},"Bag of Words",": It is the simplest form of text representation in numbers. Words are vectorized based on their count in the document or sample.",[148,23961,23962,23965],{},[169,23963,23964],{},"TF-IDF",": This algorithm works on the statistical principle of finding the word relevance in a document or a set of documents.",[148,23967,23968,23971],{},[169,23969,23970],{},"Word2Vec",": Words are vectorized, and these vectors capture information about the word's meaning based on the surrounding words. The word2vec algorithm estimates these representations by modeling text in a large corpus.",[148,23973,23974,23977],{},[169,23975,23976],{},"GloVe",": GloVe (Global Vector) is a model for distributed word representation where vector representations of words are obtained by mapping words into a meaningful space where the distance between words is related to semantic similarity.",[73,23979,23980],{},"The following sections provide a deeper look into these models, along with code snippets and examples.",[140,23982,23984],{"id":23983},"bag-of-words","Bag Of Words",[73,23986,23987,23988,23991,23992,23995,694],{},"The Bag-Of-Words model is a kind of representation that ignores word ordering and context but focuses on word ",[169,23989,23990],{},"multiplicity",". Although sub-optimal, it is used in problems where word count can be used as a feature for solving the problem. The very first reference to this model can go back to 1954! It was published in an article by linguist ",[169,23993,23994],{},"Zellig Harris",[23996,23997,23998],"sup",{},"[1]",[73,24000,24001,24002,24005],{},"In the popular Bag Of Words Model, you ",[169,24003,24004],{},"vectorize words"," based on their count in the document or sample. Here’s how you can build a BOW:-",[145,24007,24008,24011,24014],{},[148,24009,24010],{},"You remove punctuations and lower the case.",[148,24012,24013],{},"Then you eliminate the stopwords (words that are not meaningful for the suggestion, eg:- “and”, “or”, “the” etc.)",[148,24015,24016],{},"After this, you create the count vector using different libraries, and then apply your models.",[73,24018,24019],{},"Now let’s see this in action, the corpus is a small set of search queries for buying electronics, the code below does the following:",[665,24021,24022,24025,24028,24031,24034],{},[148,24023,24024],{},"Data Preparation: The code starts by importing the necessary libraries. It then defines a list of sample search queries related to buying electronics.",[148,24026,24027],{},"Text Preprocessing: Each query is converted to lowercase, split into individual words, and then cleaned of stopwords. Finally, it’s joined back into a processed string and appended to the corpus list.",[148,24029,24030],{},"Vectorization with Bag of Words: The CountVectorizer() from sci-kit-learn transforms the cleaned text into a numerical matrix where each column represents a word, and each row represents a document (query). The values are the counts of how often each word appears.",[148,24032,24033],{},"DataFrame Creation: This matrix is converted into a pandas DataFrame, making it easier to read and interpret. The DataFrame’s columns are the words, and each row corresponds to a processed search query.",[148,24035,24036],{},"Output: Finally, the code prints the resulting table, providing a clear, human-readable representation of the Bag of Words features derived from the text.",[3418,24038,24040],{"className":3420,"code":24039,"language":3422,"meta":23,"style":23},"import pandas as pd\nfrom sklearn.feature_extraction.text import CountVectorizer \nfrom nltk.corpus import stopwords \nimport nltk \nsearch_queries = [ \"buy a laptop online\",\"cheap laptops for sale\",\"best gaming laptop\",\"online shopping for electronics\", \"buy smartphone online\",\"cheap mobile phones\",\"best budget smartphone\",\"latest smartphone models\", \"buy books online\",\"top books to read\",\"best fantasy books\" ]\n\ncorpus = [] #processing the corpus \nfor query in search_queries: \n    review = query.lower() \n    review = review.split() \n    review = [word for word in review if word not in set(sw)] \n    review = ' '.join(review) \n    corpus.append(review) \n##vectorization of BOW \nvectorizer_bow = CountVectorizer() \nX_bow = vectorizer_bow.fit_transform(corpus) \nX_bow_dense = X_bow.toarray() \ndf_bow=\npd.DataFrame(X_bow_dense,columns=vectorizer_bow.get_feature_names_out()) df_bow.insert(0, 'Index', range(len(corpus))) \nprint(\"Bag of Words (BOW) Table:\") \nprint(df_bow.to_string(index=False))\n",[3061,24041,24042,24052,24074,24091,24098,24207,24211,24223,24240,24259,24276,24319,24342,24359,24364,24378,24402,24421,24429,24500,24517],{"__ignoreMap":23},[1291,24043,24044,24046,24048,24050],{"class":3427,"line":3428},[1291,24045,3476],{"class":3475},[1291,24047,9314],{"class":3431},[1291,24049,3536],{"class":3475},[1291,24051,9319],{"class":3431},[1291,24053,24054,24056,24059,24061,24064,24066,24069,24071],{"class":3427,"line":24},[1291,24055,3550],{"class":3475},[1291,24057,24058],{"class":3431}," sklearn",[1291,24060,694],{"class":3435},[1291,24062,24063],{"class":3431},"feature_extraction",[1291,24065,694],{"class":3435},[1291,24067,24068],{"class":3431},"text ",[1291,24070,3476],{"class":3475},[1291,24072,24073],{"class":3431}," CountVectorizer \n",[1291,24075,24076,24078,24081,24083,24086,24088],{"class":3427,"line":675},[1291,24077,3550],{"class":3475},[1291,24079,24080],{"class":3431}," nltk",[1291,24082,694],{"class":3435},[1291,24084,24085],{"class":3431},"corpus ",[1291,24087,3476],{"class":3475},[1291,24089,24090],{"class":3431}," stopwords \n",[1291,24092,24093,24095],{"class":3427,"line":3542},[1291,24094,3476],{"class":3475},[1291,24096,24097],{"class":3431}," nltk \n",[1291,24099,24100,24103,24105,24107,24109,24112,24114,24116,24118,24121,24123,24125,24127,24130,24132,24134,24136,24139,24141,24143,24145,24148,24150,24152,24154,24157,24159,24161,24163,24166,24168,24170,24172,24175,24177,24179,24181,24184,24186,24188,24190,24193,24195,24197,24199,24202,24204],{"class":3427,"line":3547},[1291,24101,24102],{"class":3431},"search_queries ",[1291,24104,3738],{"class":3435},[1291,24106,4145],{"class":3435},[1291,24108,3705],{"class":3435},[1291,24110,24111],{"class":3439},"buy a laptop online",[1291,24113,3691],{"class":3435},[1291,24115,3566],{"class":3435},[1291,24117,3691],{"class":3435},[1291,24119,24120],{"class":3439},"cheap laptops for sale",[1291,24122,3691],{"class":3435},[1291,24124,3566],{"class":3435},[1291,24126,3691],{"class":3435},[1291,24128,24129],{"class":3439},"best gaming laptop",[1291,24131,3691],{"class":3435},[1291,24133,3566],{"class":3435},[1291,24135,3691],{"class":3435},[1291,24137,24138],{"class":3439},"online shopping for electronics",[1291,24140,3691],{"class":3435},[1291,24142,3566],{"class":3435},[1291,24144,3705],{"class":3435},[1291,24146,24147],{"class":3439},"buy smartphone online",[1291,24149,3691],{"class":3435},[1291,24151,3566],{"class":3435},[1291,24153,3691],{"class":3435},[1291,24155,24156],{"class":3439},"cheap mobile phones",[1291,24158,3691],{"class":3435},[1291,24160,3566],{"class":3435},[1291,24162,3691],{"class":3435},[1291,24164,24165],{"class":3439},"best budget smartphone",[1291,24167,3691],{"class":3435},[1291,24169,3566],{"class":3435},[1291,24171,3691],{"class":3435},[1291,24173,24174],{"class":3439},"latest smartphone models",[1291,24176,3691],{"class":3435},[1291,24178,3566],{"class":3435},[1291,24180,3705],{"class":3435},[1291,24182,24183],{"class":3439},"buy books online",[1291,24185,3691],{"class":3435},[1291,24187,3566],{"class":3435},[1291,24189,3691],{"class":3435},[1291,24191,24192],{"class":3439},"top books to read",[1291,24194,3691],{"class":3435},[1291,24196,3566],{"class":3435},[1291,24198,3691],{"class":3435},[1291,24200,24201],{"class":3439},"best fantasy books",[1291,24203,3691],{"class":3435},[1291,24205,24206],{"class":3435}," ]\n",[1291,24208,24209],{"class":3427,"line":3572},[1291,24210,3526],{"emptyLinePlaceholder":35},[1291,24212,24213,24215,24217,24220],{"class":3427,"line":3614},[1291,24214,24085],{"class":3431},[1291,24216,3738],{"class":3435},[1291,24218,24219],{"class":3435}," []",[1291,24221,24222],{"class":3673}," #processing the corpus \n",[1291,24224,24225,24228,24231,24233,24236,24238],{"class":3427,"line":3640},[1291,24226,24227],{"class":3475},"for",[1291,24229,24230],{"class":3431}," query ",[1291,24232,9566],{"class":3475},[1291,24234,24235],{"class":3431}," search_queries",[1291,24237,4390],{"class":3435},[1291,24239,7743],{"class":3431},[1291,24241,24242,24245,24247,24250,24252,24255,24257],{"class":3427,"line":3665},[1291,24243,24244],{"class":3431},"    review ",[1291,24246,3738],{"class":3435},[1291,24248,24249],{"class":3431}," query",[1291,24251,694],{"class":3435},[1291,24253,24254],{"class":3812},"lower",[1291,24256,12394],{"class":3435},[1291,24258,7743],{"class":3431},[1291,24260,24261,24263,24265,24268,24270,24272,24274],{"class":3427,"line":3670},[1291,24262,24244],{"class":3431},[1291,24264,3738],{"class":3435},[1291,24266,24267],{"class":3431}," review",[1291,24269,694],{"class":3435},[1291,24271,9550],{"class":3812},[1291,24273,12394],{"class":3435},[1291,24275,7743],{"class":3431},[1291,24277,24278,24280,24282,24284,24287,24289,24292,24294,24297,24299,24301,24304,24306,24309,24311,24314,24317],{"class":3427,"line":3677},[1291,24279,24244],{"class":3431},[1291,24281,3738],{"class":3435},[1291,24283,4145],{"class":3435},[1291,24285,24286],{"class":3431},"word ",[1291,24288,24227],{"class":3475},[1291,24290,24291],{"class":3431}," word ",[1291,24293,9566],{"class":3475},[1291,24295,24296],{"class":3431}," review ",[1291,24298,5223],{"class":3475},[1291,24300,24291],{"class":3431},[1291,24302,24303],{"class":3435},"not",[1291,24305,5233],{"class":3435},[1291,24307,24308],{"class":6356}," set",[1291,24310,3816],{"class":3435},[1291,24312,24313],{"class":3812},"sw",[1291,24315,24316],{"class":3435},")]",[1291,24318,7743],{"class":3431},[1291,24320,24321,24323,24325,24327,24329,24331,24333,24335,24338,24340],{"class":3427,"line":3877},[1291,24322,24244],{"class":3431},[1291,24324,3738],{"class":3435},[1291,24326,6415],{"class":3435},[1291,24328,6415],{"class":3435},[1291,24330,694],{"class":3435},[1291,24332,9544],{"class":3812},[1291,24334,3816],{"class":3435},[1291,24336,24337],{"class":3812},"review",[1291,24339,713],{"class":3435},[1291,24341,7743],{"class":3431},[1291,24343,24344,24347,24349,24351,24353,24355,24357],{"class":3427,"line":3916},[1291,24345,24346],{"class":3431},"    corpus",[1291,24348,694],{"class":3435},[1291,24350,6564],{"class":3812},[1291,24352,3816],{"class":3435},[1291,24354,24337],{"class":3812},[1291,24356,713],{"class":3435},[1291,24358,7743],{"class":3431},[1291,24360,24361],{"class":3427,"line":4519},[1291,24362,24363],{"class":3673},"##vectorization of BOW \n",[1291,24365,24366,24369,24371,24374,24376],{"class":3427,"line":6038},[1291,24367,24368],{"class":3431},"vectorizer_bow ",[1291,24370,3738],{"class":3435},[1291,24372,24373],{"class":3812}," CountVectorizer",[1291,24375,12394],{"class":3435},[1291,24377,7743],{"class":3431},[1291,24379,24380,24383,24385,24388,24390,24393,24395,24398,24400],{"class":3427,"line":6043},[1291,24381,24382],{"class":3431},"X_bow ",[1291,24384,3738],{"class":3435},[1291,24386,24387],{"class":3431}," vectorizer_bow",[1291,24389,694],{"class":3435},[1291,24391,24392],{"class":3812},"fit_transform",[1291,24394,3816],{"class":3435},[1291,24396,24397],{"class":3812},"corpus",[1291,24399,713],{"class":3435},[1291,24401,7743],{"class":3431},[1291,24403,24404,24407,24409,24412,24414,24417,24419],{"class":3427,"line":6066},[1291,24405,24406],{"class":3431},"X_bow_dense ",[1291,24408,3738],{"class":3435},[1291,24410,24411],{"class":3431}," X_bow",[1291,24413,694],{"class":3435},[1291,24415,24416],{"class":3812},"toarray",[1291,24418,12394],{"class":3435},[1291,24420,7743],{"class":3431},[1291,24422,24423,24426],{"class":3427,"line":6078},[1291,24424,24425],{"class":3431},"df_bow",[1291,24427,24428],{"class":3435},"=\n",[1291,24430,24431,24433,24435,24437,24439,24442,24444,24447,24449,24452,24454,24457,24460,24463,24465,24468,24470,24472,24474,24476,24479,24481,24483,24486,24488,24491,24493,24495,24498],{"class":3427,"line":6089},[1291,24432,12330],{"class":3431},[1291,24434,694],{"class":3435},[1291,24436,12335],{"class":3812},[1291,24438,3816],{"class":3435},[1291,24440,24441],{"class":3812},"X_bow_dense",[1291,24443,3566],{"class":3435},[1291,24445,24446],{"class":3819},"columns",[1291,24448,3738],{"class":3435},[1291,24450,24451],{"class":3812},"vectorizer_bow",[1291,24453,694],{"class":3435},[1291,24455,24456],{"class":3812},"get_feature_names_out",[1291,24458,24459],{"class":3435},"())",[1291,24461,24462],{"class":3431}," df_bow",[1291,24464,694],{"class":3435},[1291,24466,24467],{"class":3812},"insert",[1291,24469,3816],{"class":3435},[1291,24471,9555],{"class":3451},[1291,24473,3566],{"class":3435},[1291,24475,6415],{"class":3435},[1291,24477,24478],{"class":3439},"Index",[1291,24480,3436],{"class":3435},[1291,24482,3566],{"class":3435},[1291,24484,24485],{"class":3812}," range",[1291,24487,3816],{"class":3435},[1291,24489,24490],{"class":3812},"len",[1291,24492,3816],{"class":3435},[1291,24494,24397],{"class":3812},[1291,24496,24497],{"class":3435},")))",[1291,24499,7743],{"class":3431},[1291,24501,24502,24504,24506,24508,24511,24513,24515],{"class":3427,"line":6124},[1291,24503,4986],{"class":3812},[1291,24505,3816],{"class":3435},[1291,24507,3691],{"class":3435},[1291,24509,24510],{"class":3439},"Bag of Words (BOW) Table:",[1291,24512,3691],{"class":3435},[1291,24514,713],{"class":3435},[1291,24516,7743],{"class":3431},[1291,24518,24519,24521,24523,24525,24527,24530,24532,24534],{"class":3427,"line":6133},[1291,24520,4986],{"class":3812},[1291,24522,3816],{"class":3435},[1291,24524,24425],{"class":3812},[1291,24526,694],{"class":3435},[1291,24528,24529],{"class":3812},"to_string",[1291,24531,3816],{"class":3435},[1291,24533,16885],{"class":3819},[1291,24535,11292],{"class":3435},[73,24537,24538],{},"Output of the above code:",[24540,24541,24545],"div",{"className":24542},[24543,24544],"max-w-full","overflow-x-auto",[16104,24546,24547,24612],{},[16107,24548,24549],{},[16110,24550,24551,24553,24556,24559,24562,24565,24568,24571,24574,24577,24580,24583,24586,24589,24592,24595,24598,24600,24603,24606,24609],{},[16113,24552,24478],{},[16113,24554,24555],{},"best",[16113,24557,24558],{},"books",[16113,24560,24561],{},"budget",[16113,24563,24564],{},"buy",[16113,24566,24567],{},"cheap",[16113,24569,24570],{},"electronics",[16113,24572,24573],{},"fantasy",[16113,24575,24576],{},"gaming",[16113,24578,24579],{},"laptop",[16113,24581,24582],{},"laptops",[16113,24584,24585],{},"latest",[16113,24587,24588],{},"mobile",[16113,24590,24591],{},"models",[16113,24593,24594],{},"online",[16113,24596,24597],{},"phones",[16113,24599,4088],{},[16113,24601,24602],{},"sale",[16113,24604,24605],{},"shopping",[16113,24607,24608],{},"smartphone",[16113,24610,24611],{},"top ",[16162,24613,24614,24659,24703,24747,24791,24836,24881,24925,24970,25014,25059],{},[16110,24615,24616,24618,24620,24622,24624,24627,24629,24631,24633,24635,24637,24639,24641,24643,24645,24647,24649,24651,24653,24655,24657],{},[16167,24617,9555],{},[16167,24619,9555],{},[16167,24621,9555],{},[16167,24623,9555],{},[16167,24625,24626],{},"1",[16167,24628,9555],{},[16167,24630,9555],{},[16167,24632,9555],{},[16167,24634,9555],{},[16167,24636,24626],{},[16167,24638,9555],{},[16167,24640,9555],{},[16167,24642,9555],{},[16167,24644,9555],{},[16167,24646,24626],{},[16167,24648,9555],{},[16167,24650,9555],{},[16167,24652,9555],{},[16167,24654,9555],{},[16167,24656,9555],{},[16167,24658,9555],{},[16110,24660,24661,24663,24665,24667,24669,24671,24673,24675,24677,24679,24681,24683,24685,24687,24689,24691,24693,24695,24697,24699,24701],{},[16167,24662,24626],{},[16167,24664,9555],{},[16167,24666,9555],{},[16167,24668,9555],{},[16167,24670,9555],{},[16167,24672,24626],{},[16167,24674,9555],{},[16167,24676,9555],{},[16167,24678,9555],{},[16167,24680,9555],{},[16167,24682,24626],{},[16167,24684,9555],{},[16167,24686,9555],{},[16167,24688,9555],{},[16167,24690,9555],{},[16167,24692,9555],{},[16167,24694,9555],{},[16167,24696,24626],{},[16167,24698,9555],{},[16167,24700,9555],{},[16167,24702,9555],{},[16110,24704,24705,24707,24709,24711,24713,24715,24717,24719,24721,24723,24725,24727,24729,24731,24733,24735,24737,24739,24741,24743,24745],{},[16167,24706,4690],{},[16167,24708,24626],{},[16167,24710,9555],{},[16167,24712,9555],{},[16167,24714,9555],{},[16167,24716,9555],{},[16167,24718,9555],{},[16167,24720,9555],{},[16167,24722,24626],{},[16167,24724,24626],{},[16167,24726,9555],{},[16167,24728,9555],{},[16167,24730,9555],{},[16167,24732,9555],{},[16167,24734,9555],{},[16167,24736,9555],{},[16167,24738,9555],{},[16167,24740,9555],{},[16167,24742,9555],{},[16167,24744,9555],{},[16167,24746,9555],{},[16110,24748,24749,24751,24753,24755,24757,24759,24761,24763,24765,24767,24769,24771,24773,24775,24777,24779,24781,24783,24785,24787,24789],{},[16167,24750,12063],{},[16167,24752,9555],{},[16167,24754,9555],{},[16167,24756,9555],{},[16167,24758,9555],{},[16167,24760,9555],{},[16167,24762,24626],{},[16167,24764,9555],{},[16167,24766,9555],{},[16167,24768,9555],{},[16167,24770,9555],{},[16167,24772,9555],{},[16167,24774,9555],{},[16167,24776,9555],{},[16167,24778,24626],{},[16167,24780,9555],{},[16167,24782,9555],{},[16167,24784,9555],{},[16167,24786,24626],{},[16167,24788,9555],{},[16167,24790,9555],{},[16110,24792,24793,24796,24798,24800,24802,24804,24806,24808,24810,24812,24814,24816,24818,24820,24822,24824,24826,24828,24830,24832,24834],{},[16167,24794,24795],{},"4",[16167,24797,9555],{},[16167,24799,9555],{},[16167,24801,9555],{},[16167,24803,24626],{},[16167,24805,9555],{},[16167,24807,9555],{},[16167,24809,9555],{},[16167,24811,9555],{},[16167,24813,9555],{},[16167,24815,9555],{},[16167,24817,9555],{},[16167,24819,9555],{},[16167,24821,9555],{},[16167,24823,24626],{},[16167,24825,9555],{},[16167,24827,9555],{},[16167,24829,9555],{},[16167,24831,9555],{},[16167,24833,24626],{},[16167,24835,9555],{},[16110,24837,24838,24841,24843,24845,24847,24849,24851,24853,24855,24857,24859,24861,24863,24865,24867,24869,24871,24873,24875,24877,24879],{},[16167,24839,24840],{},"5",[16167,24842,9555],{},[16167,24844,9555],{},[16167,24846,9555],{},[16167,24848,9555],{},[16167,24850,24626],{},[16167,24852,9555],{},[16167,24854,9555],{},[16167,24856,9555],{},[16167,24858,9555],{},[16167,24860,9555],{},[16167,24862,9555],{},[16167,24864,24626],{},[16167,24866,9555],{},[16167,24868,9555],{},[16167,24870,24626],{},[16167,24872,9555],{},[16167,24874,9555],{},[16167,24876,9555],{},[16167,24878,9555],{},[16167,24880,9555],{},[16110,24882,24883,24885,24887,24889,24891,24893,24895,24897,24899,24901,24903,24905,24907,24909,24911,24913,24915,24917,24919,24921,24923],{},[16167,24884,4229],{},[16167,24886,24626],{},[16167,24888,9555],{},[16167,24890,24626],{},[16167,24892,9555],{},[16167,24894,9555],{},[16167,24896,9555],{},[16167,24898,9555],{},[16167,24900,9555],{},[16167,24902,9555],{},[16167,24904,9555],{},[16167,24906,9555],{},[16167,24908,9555],{},[16167,24910,9555],{},[16167,24912,9555],{},[16167,24914,9555],{},[16167,24916,9555],{},[16167,24918,9555],{},[16167,24920,9555],{},[16167,24922,24626],{},[16167,24924,9555],{},[16110,24926,24927,24930,24932,24934,24936,24938,24940,24942,24944,24946,24948,24950,24952,24954,24956,24958,24960,24962,24964,24966,24968],{},[16167,24928,24929],{},"7",[16167,24931,9555],{},[16167,24933,9555],{},[16167,24935,9555],{},[16167,24937,9555],{},[16167,24939,9555],{},[16167,24941,9555],{},[16167,24943,9555],{},[16167,24945,9555],{},[16167,24947,9555],{},[16167,24949,9555],{},[16167,24951,24626],{},[16167,24953,9555],{},[16167,24955,24626],{},[16167,24957,9555],{},[16167,24959,9555],{},[16167,24961,9555],{},[16167,24963,9555],{},[16167,24965,9555],{},[16167,24967,24626],{},[16167,24969,9555],{},[16110,24971,24972,24974,24976,24978,24980,24982,24984,24986,24988,24990,24992,24994,24996,24998,25000,25002,25004,25006,25008,25010,25012],{},[16167,24973,11126],{},[16167,24975,9555],{},[16167,24977,24626],{},[16167,24979,9555],{},[16167,24981,24626],{},[16167,24983,9555],{},[16167,24985,9555],{},[16167,24987,9555],{},[16167,24989,9555],{},[16167,24991,9555],{},[16167,24993,9555],{},[16167,24995,9555],{},[16167,24997,9555],{},[16167,24999,9555],{},[16167,25001,24626],{},[16167,25003,9555],{},[16167,25005,9555],{},[16167,25007,9555],{},[16167,25009,9555],{},[16167,25011,9555],{},[16167,25013,9555],{},[16110,25015,25016,25019,25021,25023,25025,25027,25029,25031,25033,25035,25037,25039,25041,25043,25045,25047,25049,25051,25053,25055,25057],{},[16167,25017,25018],{},"9",[16167,25020,9555],{},[16167,25022,24626],{},[16167,25024,9555],{},[16167,25026,9555],{},[16167,25028,9555],{},[16167,25030,9555],{},[16167,25032,9555],{},[16167,25034,9555],{},[16167,25036,9555],{},[16167,25038,9555],{},[16167,25040,9555],{},[16167,25042,9555],{},[16167,25044,9555],{},[16167,25046,9555],{},[16167,25048,9555],{},[16167,25050,24626],{},[16167,25052,9555],{},[16167,25054,9555],{},[16167,25056,9555],{},[16167,25058,24626],{},[16110,25060,25061,25063,25065,25067,25069,25071,25073,25075,25077,25079,25081,25083,25085,25087,25089,25091,25093,25095,25097,25099,25101],{},[16167,25062,6769],{},[16167,25064,24626],{},[16167,25066,24626],{},[16167,25068,9555],{},[16167,25070,9555],{},[16167,25072,9555],{},[16167,25074,9555],{},[16167,25076,24626],{},[16167,25078,9555],{},[16167,25080,9555],{},[16167,25082,9555],{},[16167,25084,9555],{},[16167,25086,9555],{},[16167,25088,9555],{},[16167,25090,9555],{},[16167,25092,9555],{},[16167,25094,9555],{},[16167,25096,9555],{},[16167,25098,9555],{},[16167,25100,9555],{},[16167,25102,9555],{},[73,25104,25105],{},"This results in a matrix of word counts, highlighting the importance of various terms across documents. Although simple, BoW can be a stepping stone to more sophisticated models.",[140,25107,25109],{"id":25108},"term-frequency-inverse-document-frequency-tf-idf","Term Frequency - Inverse Document Frequency (TF-IDF)",[73,25111,25112,25113,25116],{},"TF-IDF weighs the importance of words more cleverly than BoW. This algorithm works on the statistical principle of finding the ",[169,25114,25115],{},"relevance of the word"," in a document or a set of documents.",[73,25118,25119,25120,25123,25124,25127],{},"The term frequency (TF) score measures the ",[169,25121,25122],{},"frequency of a word"," occurring in the document while the inverse document frequency (IDF) measures the ",[169,25125,25126],{},"rarity of the words"," in the corpus. It is given more mathematical importance as some words rarely occurring in the text still might hold relevant information.",[73,25129,25130],{},"Now let’s see the expressions that are used to calculate the tf-idf score:",[1291,25132,25135],{"className":25133},[25134],"katex-display",[1291,25136,25139,25292],{"className":25137},[25138],"katex",[1291,25140,25143],{"className":25141},[25142],"katex-mathml",[25144,25145,25147],"math",{"xmlns":25146,"display":2912},"http:\u002F\u002Fwww.w3.org\u002F1998\u002FMath\u002FMathML",[25148,25149,25150,25287],"semantics",{},[25151,25152,25153,25156,25158,25162,25165,25168,25182,25184,25187,25189,25192,25195,25198,25200,25202,25205,25208,25210,25212,25215,25228,25231,25234,25236,25239,25241,25243,25245,25247,25250,25253,25255,25257,25259,25261,25263,25265,25267,25269,25271,25273,25275,25277,25279,25281],"mrow",{},[25154,25155,4843],"mi",{},[25154,25157,9643],{},[25159,25160,25161],"mo",{},"−",[25154,25163,25164],{},"i",[25154,25166,25167],{},"d",[25169,25170,25171,25173],"msub",{},[25154,25172,9643],{},[25151,25174,25175,25177,25179],{},[25154,25176,25164],{},[25159,25178,3566],{"separator":1143},[25154,25180,25181],{},"j",[25159,25183,3738],{},[25154,25185,25186],{},"T",[25154,25188,19676],{},[25154,25190,25191],{},"r",[25154,25193,25194],{},"m",[25154,25196,25197],{},"F",[25154,25199,25191],{},[25154,25201,19676],{},[25154,25203,25204],{},"q",[25154,25206,25207],{},"u",[25154,25209,19676],{},[25154,25211,19216],{},[25154,25213,25214],{},"c",[25169,25216,25217,25220],{},[25154,25218,25219],{},"y",[25151,25221,25222,25224,25226],{},[25154,25223,25164],{},[25159,25225,3566],{"separator":1143},[25154,25227,25181],{},[25159,25229,25230],{},"∗",[25154,25232,25233],{},"I",[25154,25235,19216],{},[25154,25237,25238],{},"v",[25154,25240,19676],{},[25154,25242,25191],{},[25154,25244,17520],{},[25154,25246,19676],{},[25154,25248,25249],{},"D",[25154,25251,25252],{},"o",[25154,25254,25214],{},[25154,25256,25207],{},[25154,25258,25194],{},[25154,25260,19676],{},[25154,25262,19216],{},[25154,25264,4843],{},[25154,25266,25197],{},[25154,25268,25191],{},[25154,25270,19676],{},[25154,25272,25204],{},[25154,25274,25207],{},[25154,25276,19676],{},[25154,25278,19216],{},[25154,25280,25214],{},[25169,25282,25283,25285],{},[25154,25284,25219],{},[25154,25286,25164],{},[25288,25289,25291],"annotation",{"encoding":25290},"application\u002Fx-tex","tf-idf_{i,j} = Term Frequency_{i,j} * Inverse Document Frequency_i",[1291,25293,25296,25326,25417,25518],{"className":25294,"ariaHidden":1143},[25295],"katex-html",[1291,25297,25300,25305,25310,25314,25319,25323],{"className":25298},[25299],"base",[1291,25301],{"className":25302,"style":25304},[25303],"strut","height:0.8889em;vertical-align:-0.1944em;",[1291,25306,4843],{"className":25307},[25308,25309],"mord","mathnormal",[1291,25311,9643],{"className":25312,"style":25313},[25308,25309],"margin-right:0.10764em;",[1291,25315],{"className":25316,"style":25318},[25317],"mspace","margin-right:0.2222em;",[1291,25320,25161],{"className":25321},[25322],"mbin",[1291,25324],{"className":25325,"style":25318},[25317],[1291,25327,25329,25333,25336,25339,25406,25410,25414],{"className":25328},[25299],[1291,25330],{"className":25331,"style":25332},[25303],"height:0.9805em;vertical-align:-0.2861em;",[1291,25334,25164],{"className":25335},[25308,25309],[1291,25337,25167],{"className":25338},[25308,25309],[1291,25340,25342,25345],{"className":25341},[25308],[1291,25343,9643],{"className":25344,"style":25313},[25308,25309],[1291,25346,25349],{"className":25347},[25348],"msupsub",[1291,25350,25354,25397],{"className":25351},[25352,25353],"vlist-t","vlist-t2",[1291,25355,25358,25392],{"className":25356},[25357],"vlist-r",[1291,25359,25363],{"className":25360,"style":25362},[25361],"vlist","height:0.3117em;",[1291,25364,25366,25371],{"style":25365},"top:-2.55em;margin-left:-0.1076em;margin-right:0.05em;",[1291,25367],{"className":25368,"style":25370},[25369],"pstrut","height:2.7em;",[1291,25372,25378],{"className":25373},[25374,25375,25376,25377],"sizing","reset-size6","size3","mtight",[1291,25379,25381,25384,25388],{"className":25380},[25308,25377],[1291,25382,25164],{"className":25383},[25308,25309,25377],[1291,25385,3566],{"className":25386},[25387,25377],"mpunct",[1291,25389,25181],{"className":25390,"style":25391},[25308,25309,25377],"margin-right:0.05724em;",[1291,25393,25396],{"className":25394},[25395],"vlist-s","​",[1291,25398,25400],{"className":25399},[25357],[1291,25401,25404],{"className":25402,"style":25403},[25361],"height:0.2861em;",[1291,25405],{},[1291,25407],{"className":25408,"style":25409},[25317],"margin-right:0.2778em;",[1291,25411,3738],{"className":25412},[25413],"mrel",[1291,25415],{"className":25416,"style":25409},[25317],[1291,25418,25420,25424,25428,25433,25436,25439,25443,25447,25450,25453,25456,25459,25509,25512,25515],{"className":25419},[25299],[1291,25421],{"className":25422,"style":25423},[25303],"height:0.9694em;vertical-align:-0.2861em;",[1291,25425,25186],{"className":25426,"style":25427},[25308,25309],"margin-right:0.13889em;",[1291,25429,25432],{"className":25430,"style":25431},[25308,25309],"margin-right:0.02778em;","er",[1291,25434,25194],{"className":25435},[25308,25309],[1291,25437,25197],{"className":25438,"style":25427},[25308,25309],[1291,25440,25442],{"className":25441},[25308,25309],"re",[1291,25444,25204],{"className":25445,"style":25446},[25308,25309],"margin-right:0.03588em;",[1291,25448,25207],{"className":25449},[25308,25309],[1291,25451,19676],{"className":25452},[25308,25309],[1291,25454,19216],{"className":25455},[25308,25309],[1291,25457,25214],{"className":25458},[25308,25309],[1291,25460,25462,25465],{"className":25461},[25308],[1291,25463,25219],{"className":25464,"style":25446},[25308,25309],[1291,25466,25468],{"className":25467},[25348],[1291,25469,25471,25501],{"className":25470},[25352,25353],[1291,25472,25474,25498],{"className":25473},[25357],[1291,25475,25477],{"className":25476,"style":25362},[25361],[1291,25478,25480,25483],{"style":25479},"top:-2.55em;margin-left:-0.0359em;margin-right:0.05em;",[1291,25481],{"className":25482,"style":25370},[25369],[1291,25484,25486],{"className":25485},[25374,25375,25376,25377],[1291,25487,25489,25492,25495],{"className":25488},[25308,25377],[1291,25490,25164],{"className":25491},[25308,25309,25377],[1291,25493,3566],{"className":25494},[25387,25377],[1291,25496,25181],{"className":25497,"style":25391},[25308,25309,25377],[1291,25499,25396],{"className":25500},[25395],[1291,25502,25504],{"className":25503},[25357],[1291,25505,25507],{"className":25506,"style":25403},[25361],[1291,25508],{},[1291,25510],{"className":25511,"style":25318},[25317],[1291,25513,25230],{"className":25514},[25322],[1291,25516],{"className":25517,"style":25318},[25317],[1291,25519,25521,25525,25529,25532,25535,25539,25542,25545,25548,25551,25555,25558,25561,25564,25567,25570,25573],{"className":25520},[25299],[1291,25522],{"className":25523,"style":25524},[25303],"height:0.8778em;vertical-align:-0.1944em;",[1291,25526,25233],{"className":25527,"style":25528},[25308,25309],"margin-right:0.07847em;",[1291,25530,19216],{"className":25531},[25308,25309],[1291,25533,25238],{"className":25534,"style":25446},[25308,25309],[1291,25536,25538],{"className":25537},[25308,25309],"erseDoc",[1291,25540,25207],{"className":25541},[25308,25309],[1291,25543,25194],{"className":25544},[25308,25309],[1291,25546,19676],{"className":25547},[25308,25309],[1291,25549,19216],{"className":25550},[25308,25309],[1291,25552,25554],{"className":25553,"style":25427},[25308,25309],"tF",[1291,25556,25442],{"className":25557},[25308,25309],[1291,25559,25204],{"className":25560,"style":25446},[25308,25309],[1291,25562,25207],{"className":25563},[25308,25309],[1291,25565,19676],{"className":25566},[25308,25309],[1291,25568,19216],{"className":25569},[25308,25309],[1291,25571,25214],{"className":25572},[25308,25309],[1291,25574,25576,25579],{"className":25575},[25308],[1291,25577,25219],{"className":25578,"style":25446},[25308,25309],[1291,25580,25582],{"className":25581},[25348],[1291,25583,25585,25605],{"className":25584},[25352,25353],[1291,25586,25588,25602],{"className":25587},[25357],[1291,25589,25591],{"className":25590,"style":25362},[25361],[1291,25592,25593,25596],{"style":25479},[1291,25594],{"className":25595,"style":25370},[25369],[1291,25597,25599],{"className":25598},[25374,25375,25376,25377],[1291,25600,25164],{"className":25601},[25308,25309,25377],[1291,25603,25396],{"className":25604},[25395],[1291,25606,25608],{"className":25607},[25357],[1291,25609,25612],{"className":25610,"style":25611},[25361],"height:0.15em;",[1291,25613],{},[73,25615,25616],{},"where,",[1291,25618,25620],{"className":25619},[25134],[1291,25621,25623,25808],{"className":25622},[25138],[1291,25624,25626],{"className":25625},[25142],[25144,25627,25628],{"xmlns":25146,"display":2912},[25148,25629,25630,25805],{},[25151,25631,25632,25634,25636,25638,25640,25642,25644,25646,25648,25650,25652,25654,25656,25668,25670],{},[25154,25633,25186],{},[25154,25635,19676],{},[25154,25637,25191],{},[25154,25639,25194],{},[25154,25641,25197],{},[25154,25643,25191],{},[25154,25645,19676],{},[25154,25647,25204],{},[25154,25649,25207],{},[25154,25651,19676],{},[25154,25653,19216],{},[25154,25655,25214],{},[25169,25657,25658,25660],{},[25154,25659,25219],{},[25151,25661,25662,25664,25666],{},[25154,25663,25164],{},[25159,25665,3566],{"separator":1143},[25154,25667,25181],{},[25159,25669,3738],{},[25671,25672,25673,25737],"mfrac",{},[25151,25674,25675,25677,25679,25681,25683,25687,25689,25691,25693,25695,25697,25699,25701,25703,25705,25707,25709,25711,25713,25715,25717,25719,25721,25723,25725,25727,25729,25731,25733,25735],{},[25154,25676,25186],{},[25154,25678,19676],{},[25154,25680,25191],{},[25154,25682,25194],{},[25684,25685,25686],"mtext",{},"  ",[25154,25688,25164],{},[25684,25690,25686],{},[25154,25692,9643],{},[25154,25694,25191],{},[25154,25696,19676],{},[25154,25698,25204],{},[25154,25700,25207],{},[25154,25702,19676],{},[25154,25704,19216],{},[25154,25706,25214],{},[25154,25708,25219],{},[25684,25710,25686],{},[25154,25712,25164],{},[25154,25714,19216],{},[25684,25716,25686],{},[25154,25718,25167],{},[25154,25720,25252],{},[25154,25722,25214],{},[25154,25724,25207],{},[25154,25726,25194],{},[25154,25728,19676],{},[25154,25730,19216],{},[25154,25732,4843],{},[25684,25734,25686],{},[25154,25736,25181],{},[25151,25738,25739,25741,25743,25745,25747,25750,25752,25754,25756,25759,25761,25763,25765,25767,25769,25771,25773,25775,25777,25779,25781,25783,25785,25787,25789,25791,25793,25795,25797,25799,25801,25803],{},[25154,25740,25186],{},[25154,25742,25252],{},[25154,25744,4843],{},[25154,25746,77],{},[25154,25748,25749],{},"l",[25684,25751,25686],{},[25154,25753,19216],{},[25154,25755,25252],{},[25154,25757,694],{"mathvariant":25758},"normal",[25684,25760,25686],{},[25154,25762,25252],{},[25154,25764,9643],{},[25684,25766,25686],{},[25154,25768,4843],{},[25154,25770,19676],{},[25154,25772,25191],{},[25154,25774,25194],{},[25154,25776,17520],{},[25684,25778,25686],{},[25154,25780,25164],{},[25154,25782,19216],{},[25684,25784,25686],{},[25154,25786,25167],{},[25154,25788,25252],{},[25154,25790,25214],{},[25154,25792,25207],{},[25154,25794,25194],{},[25154,25796,19676],{},[25154,25798,19216],{},[25154,25800,4843],{},[25684,25802,25686],{},[25154,25804,25181],{},[25288,25806,25807],{"encoding":25290},"Term Frequency_{i,j} = \\dfrac{Term \\; i \\; frequency \\; in \\; document \\; j}{Total \\; no. \\; of \\; terms \\; in \\; document \\; j}  ",[1291,25809,25811,25905],{"className":25810,"ariaHidden":1143},[25295],[1291,25812,25814,25817,25820,25823,25826,25829,25832,25835,25838,25841,25844,25847,25896,25899,25902],{"className":25813},[25299],[1291,25815],{"className":25816,"style":25423},[25303],[1291,25818,25186],{"className":25819,"style":25427},[25308,25309],[1291,25821,25432],{"className":25822,"style":25431},[25308,25309],[1291,25824,25194],{"className":25825},[25308,25309],[1291,25827,25197],{"className":25828,"style":25427},[25308,25309],[1291,25830,25442],{"className":25831},[25308,25309],[1291,25833,25204],{"className":25834,"style":25446},[25308,25309],[1291,25836,25207],{"className":25837},[25308,25309],[1291,25839,19676],{"className":25840},[25308,25309],[1291,25842,19216],{"className":25843},[25308,25309],[1291,25845,25214],{"className":25846},[25308,25309],[1291,25848,25850,25853],{"className":25849},[25308],[1291,25851,25219],{"className":25852,"style":25446},[25308,25309],[1291,25854,25856],{"className":25855},[25348],[1291,25857,25859,25888],{"className":25858},[25352,25353],[1291,25860,25862,25885],{"className":25861},[25357],[1291,25863,25865],{"className":25864,"style":25362},[25361],[1291,25866,25867,25870],{"style":25479},[1291,25868],{"className":25869,"style":25370},[25369],[1291,25871,25873],{"className":25872},[25374,25375,25376,25377],[1291,25874,25876,25879,25882],{"className":25875},[25308,25377],[1291,25877,25164],{"className":25878},[25308,25309,25377],[1291,25880,3566],{"className":25881},[25387,25377],[1291,25883,25181],{"className":25884,"style":25391},[25308,25309,25377],[1291,25886,25396],{"className":25887},[25395],[1291,25889,25891],{"className":25890},[25357],[1291,25892,25894],{"className":25893,"style":25403},[25361],[1291,25895],{},[1291,25897],{"className":25898,"style":25409},[25317],[1291,25900,3738],{"className":25901},[25413],[1291,25903],{"className":25904,"style":25409},[25317],[1291,25906,25908,25912],{"className":25907},[25299],[1291,25909],{"className":25910,"style":25911},[25303],"height:2.2519em;vertical-align:-0.8804em;",[1291,25913,25915,25920,26140],{"className":25914},[25308],[1291,25916],{"className":25917},[25918,25919],"mopen","nulldelimiter",[1291,25921,25923],{"className":25922},[25671],[1291,25924,25926,26131],{"className":25925},[25352,25353],[1291,25927,25929,26128],{"className":25928},[25357],[1291,25930,25933,26032,26043],{"className":25931,"style":25932},[25361],"height:1.3714em;",[1291,25934,25936,25940],{"style":25935},"top:-2.314em;",[1291,25937],{"className":25938,"style":25939},[25369],"height:3em;",[1291,25941,25943,25946,25949,25952,25955,25959,25962,25965,25968,25971,25974,25977,25980,25983,25986,25989,25992,25995,25998,26001,26004,26007,26011,26014,26017,26020,26023,26026,26029],{"className":25942},[25308],[1291,25944,25186],{"className":25945,"style":25427},[25308,25309],[1291,25947,25252],{"className":25948},[25308,25309],[1291,25950,4843],{"className":25951},[25308,25309],[1291,25953,77],{"className":25954},[25308,25309],[1291,25956,25749],{"className":25957,"style":25958},[25308,25309],"margin-right:0.01968em;",[1291,25960],{"className":25961,"style":25409},[25317],[1291,25963,19216],{"className":25964},[25308,25309],[1291,25966,25252],{"className":25967},[25308,25309],[1291,25969,694],{"className":25970},[25308],[1291,25972],{"className":25973,"style":25409},[25317],[1291,25975,25252],{"className":25976},[25308,25309],[1291,25978,9643],{"className":25979,"style":25313},[25308,25309],[1291,25981],{"className":25982,"style":25409},[25317],[1291,25984,4843],{"className":25985},[25308,25309],[1291,25987,25432],{"className":25988,"style":25431},[25308,25309],[1291,25990,25194],{"className":25991},[25308,25309],[1291,25993,17520],{"className":25994},[25308,25309],[1291,25996],{"className":25997,"style":25409},[25317],[1291,25999,9566],{"className":26000},[25308,25309],[1291,26002],{"className":26003,"style":25409},[25317],[1291,26005,25167],{"className":26006},[25308,25309],[1291,26008,26010],{"className":26009},[25308,25309],"oc",[1291,26012,25207],{"className":26013},[25308,25309],[1291,26015,25194],{"className":26016},[25308,25309],[1291,26018,19676],{"className":26019},[25308,25309],[1291,26021,19216],{"className":26022},[25308,25309],[1291,26024,4843],{"className":26025},[25308,25309],[1291,26027],{"className":26028,"style":25409},[25317],[1291,26030,25181],{"className":26031,"style":25391},[25308,25309],[1291,26033,26035,26038],{"style":26034},"top:-3.23em;",[1291,26036],{"className":26037,"style":25939},[25369],[1291,26039],{"className":26040,"style":26042},[26041],"frac-line","border-bottom-width:0.04em;",[1291,26044,26046,26049],{"style":26045},"top:-3.677em;",[1291,26047],{"className":26048,"style":25939},[25369],[1291,26050,26052,26055,26058,26061,26064,26067,26070,26073,26076,26079,26082,26085,26088,26092,26095,26098,26101,26104,26107,26110,26113,26116,26119,26122,26125],{"className":26051},[25308],[1291,26053,25186],{"className":26054,"style":25427},[25308,25309],[1291,26056,25432],{"className":26057,"style":25431},[25308,25309],[1291,26059,25194],{"className":26060},[25308,25309],[1291,26062],{"className":26063,"style":25409},[25317],[1291,26065,25164],{"className":26066},[25308,25309],[1291,26068],{"className":26069,"style":25409},[25317],[1291,26071,9643],{"className":26072,"style":25313},[25308,25309],[1291,26074,25442],{"className":26075},[25308,25309],[1291,26077,25204],{"className":26078,"style":25446},[25308,25309],[1291,26080,25207],{"className":26081},[25308,25309],[1291,26083,19676],{"className":26084},[25308,25309],[1291,26086,19216],{"className":26087},[25308,25309],[1291,26089,26091],{"className":26090,"style":25446},[25308,25309],"cy",[1291,26093],{"className":26094,"style":25409},[25317],[1291,26096,9566],{"className":26097},[25308,25309],[1291,26099],{"className":26100,"style":25409},[25317],[1291,26102,25167],{"className":26103},[25308,25309],[1291,26105,26010],{"className":26106},[25308,25309],[1291,26108,25207],{"className":26109},[25308,25309],[1291,26111,25194],{"className":26112},[25308,25309],[1291,26114,19676],{"className":26115},[25308,25309],[1291,26117,19216],{"className":26118},[25308,25309],[1291,26120,4843],{"className":26121},[25308,25309],[1291,26123],{"className":26124,"style":25409},[25317],[1291,26126,25181],{"className":26127,"style":25391},[25308,25309],[1291,26129,25396],{"className":26130},[25395],[1291,26132,26134],{"className":26133},[25357],[1291,26135,26138],{"className":26136,"style":26137},[25361],"height:0.8804em;",[1291,26139],{},[1291,26141],{"className":26142},[26143,25919],"mclose",[1291,26145,26147],{"className":26146},[25134],[1291,26148,26150,26338],{"className":26149},[25138],[1291,26151,26153],{"className":26152},[25142],[25144,26154,26155],{"xmlns":25146,"display":2912},[25148,26156,26157,26335],{},[25151,26158,26159,26161,26163,26165,26167,26169,26171,26173,26175,26177,26179,26181,26183,26185,26187,26189,26191,26193,26195,26197,26199,26201,26203,26205,26217,26219,26221,26223,26226,26228,26333],{},[25154,26160,25233],{},[25154,26162,19216],{},[25154,26164,25238],{},[25154,26166,19676],{},[25154,26168,25191],{},[25154,26170,17520],{},[25154,26172,19676],{},[25154,26174,25249],{},[25154,26176,25252],{},[25154,26178,25214],{},[25154,26180,25207],{},[25154,26182,25194],{},[25154,26184,19676],{},[25154,26186,19216],{},[25154,26188,4843],{},[25154,26190,25197],{},[25154,26192,25191],{},[25154,26194,19676],{},[25154,26196,25204],{},[25154,26198,25207],{},[25154,26200,19676],{},[25154,26202,19216],{},[25154,26204,25214],{},[25169,26206,26207,26209],{},[25154,26208,25219],{},[25151,26210,26211,26213,26215],{},[25154,26212,25164],{},[25159,26214,3566],{"separator":1143},[25154,26216,25181],{},[25159,26218,3738],{},[25154,26220,25749],{},[25154,26222,25252],{},[25154,26224,26225],{},"g",[25159,26227,3816],{"stretchy":14406},[25671,26229,26230,26262],{},[25151,26231,26232,26234,26236,26238,26240,26242,26244,26246,26248,26250,26252,26254,26256,26258,26260],{},[25154,26233,25186],{},[25154,26235,25252],{},[25154,26237,4843],{},[25154,26239,77],{},[25154,26241,25749],{},[25684,26243,25686],{},[25154,26245,25167],{},[25154,26247,25252],{},[25154,26249,25214],{},[25154,26251,25207],{},[25154,26253,25194],{},[25154,26255,19676],{},[25154,26257,19216],{},[25154,26259,4843],{},[25154,26261,17520],{},[25151,26263,26264,26267,26269,26271,26273,26275,26277,26279,26281,26283,26285,26287,26289,26291,26293,26295,26297,26299,26301,26303,26305,26307,26309,26311,26313,26315,26317,26319,26321,26323,26325,26327,26329,26331],{},[25154,26265,26266],{},"N",[25154,26268,25252],{},[25154,26270,694],{"mathvariant":25758},[25684,26272,25686],{},[25154,26274,25252],{},[25154,26276,9643],{},[25684,26278,25686],{},[25154,26280,25167],{},[25154,26282,25252],{},[25154,26284,25214],{},[25154,26286,25207],{},[25154,26288,25194],{},[25154,26290,19676],{},[25154,26292,19216],{},[25154,26294,4843],{},[25154,26296,17520],{},[25684,26298,25686],{},[25154,26300,25214],{},[25154,26302,25252],{},[25154,26304,19216],{},[25154,26306,4843],{},[25154,26308,77],{},[25154,26310,25164],{},[25154,26312,19216],{},[25154,26314,25164],{},[25154,26316,19216],{},[25154,26318,26225],{},[25684,26320,25686],{},[25154,26322,4843],{},[25154,26324,19676],{},[25154,26326,25191],{},[25154,26328,25194],{},[25684,26330,25686],{},[25154,26332,25164],{},[25159,26334,713],{"stretchy":14406},[25288,26336,26337],{"encoding":25290},"Inverse Document Frequency_{i,j} = log(\\dfrac{Total \\; documents}{No. \\; of \\; documents \\; containing \\; term \\; i})",[1291,26339,26341,26450],{"className":26340,"ariaHidden":1143},[25295],[1291,26342,26344,26347,26350,26353,26356,26359,26362,26365,26368,26371,26374,26377,26380,26383,26386,26389,26392,26441,26444,26447],{"className":26343},[25299],[1291,26345],{"className":26346,"style":25423},[25303],[1291,26348,25233],{"className":26349,"style":25528},[25308,25309],[1291,26351,19216],{"className":26352},[25308,25309],[1291,26354,25238],{"className":26355,"style":25446},[25308,25309],[1291,26357,25538],{"className":26358},[25308,25309],[1291,26360,25207],{"className":26361},[25308,25309],[1291,26363,25194],{"className":26364},[25308,25309],[1291,26366,19676],{"className":26367},[25308,25309],[1291,26369,19216],{"className":26370},[25308,25309],[1291,26372,25554],{"className":26373,"style":25427},[25308,25309],[1291,26375,25442],{"className":26376},[25308,25309],[1291,26378,25204],{"className":26379,"style":25446},[25308,25309],[1291,26381,25207],{"className":26382},[25308,25309],[1291,26384,19676],{"className":26385},[25308,25309],[1291,26387,19216],{"className":26388},[25308,25309],[1291,26390,25214],{"className":26391},[25308,25309],[1291,26393,26395,26398],{"className":26394},[25308],[1291,26396,25219],{"className":26397,"style":25446},[25308,25309],[1291,26399,26401],{"className":26400},[25348],[1291,26402,26404,26433],{"className":26403},[25352,25353],[1291,26405,26407,26430],{"className":26406},[25357],[1291,26408,26410],{"className":26409,"style":25362},[25361],[1291,26411,26412,26415],{"style":25479},[1291,26413],{"className":26414,"style":25370},[25369],[1291,26416,26418],{"className":26417},[25374,25375,25376,25377],[1291,26419,26421,26424,26427],{"className":26420},[25308,25377],[1291,26422,25164],{"className":26423},[25308,25309,25377],[1291,26425,3566],{"className":26426},[25387,25377],[1291,26428,25181],{"className":26429,"style":25391},[25308,25309,25377],[1291,26431,25396],{"className":26432},[25395],[1291,26434,26436],{"className":26435},[25357],[1291,26437,26439],{"className":26438,"style":25403},[25361],[1291,26440],{},[1291,26442],{"className":26443,"style":25409},[25317],[1291,26445,3738],{"className":26446},[25413],[1291,26448],{"className":26449,"style":25409},[25317],[1291,26451,26453,26456,26459,26462,26465,26468,26650],{"className":26452},[25299],[1291,26454],{"className":26455,"style":25911},[25303],[1291,26457,25749],{"className":26458,"style":25958},[25308,25309],[1291,26460,25252],{"className":26461},[25308,25309],[1291,26463,26225],{"className":26464,"style":25446},[25308,25309],[1291,26466,3816],{"className":26467},[25918],[1291,26469,26471,26474,26647],{"className":26470},[25308],[1291,26472],{"className":26473},[25918,25919],[1291,26475,26477],{"className":26476},[25671],[1291,26478,26480,26639],{"className":26479},[25352,25353],[1291,26481,26483,26636],{"className":26482},[25357],[1291,26484,26486,26578,26586],{"className":26485,"style":25932},[25361],[1291,26487,26488,26491],{"style":25935},[1291,26489],{"className":26490,"style":25939},[25369],[1291,26492,26494,26498,26501,26504,26507,26510,26513,26516,26519,26522,26525,26528,26531,26534,26537,26540,26543,26547,26550,26553,26557,26560,26563,26566,26569,26572,26575],{"className":26493},[25308],[1291,26495,26266],{"className":26496,"style":26497},[25308,25309],"margin-right:0.10903em;",[1291,26499,25252],{"className":26500},[25308,25309],[1291,26502,694],{"className":26503},[25308],[1291,26505],{"className":26506,"style":25409},[25317],[1291,26508,25252],{"className":26509},[25308,25309],[1291,26511,9643],{"className":26512,"style":25313},[25308,25309],[1291,26514],{"className":26515,"style":25409},[25317],[1291,26517,25167],{"className":26518},[25308,25309],[1291,26520,26010],{"className":26521},[25308,25309],[1291,26523,25207],{"className":26524},[25308,25309],[1291,26526,25194],{"className":26527},[25308,25309],[1291,26529,19676],{"className":26530},[25308,25309],[1291,26532,19216],{"className":26533},[25308,25309],[1291,26535,4843],{"className":26536},[25308,25309],[1291,26538,17520],{"className":26539},[25308,25309],[1291,26541],{"className":26542,"style":25409},[25317],[1291,26544,26546],{"className":26545},[25308,25309],"co",[1291,26548,19216],{"className":26549},[25308,25309],[1291,26551,4843],{"className":26552},[25308,25309],[1291,26554,26556],{"className":26555},[25308,25309],"ainin",[1291,26558,26225],{"className":26559,"style":25446},[25308,25309],[1291,26561],{"className":26562,"style":25409},[25317],[1291,26564,4843],{"className":26565},[25308,25309],[1291,26567,25432],{"className":26568,"style":25431},[25308,25309],[1291,26570,25194],{"className":26571},[25308,25309],[1291,26573],{"className":26574,"style":25409},[25317],[1291,26576,25164],{"className":26577},[25308,25309],[1291,26579,26580,26583],{"style":26034},[1291,26581],{"className":26582,"style":25939},[25369],[1291,26584],{"className":26585,"style":26042},[26041],[1291,26587,26588,26591],{"style":26045},[1291,26589],{"className":26590,"style":25939},[25369],[1291,26592,26594,26597,26600,26603,26606,26609,26612,26615,26618,26621,26624,26627,26630,26633],{"className":26593},[25308],[1291,26595,25186],{"className":26596,"style":25427},[25308,25309],[1291,26598,25252],{"className":26599},[25308,25309],[1291,26601,4843],{"className":26602},[25308,25309],[1291,26604,77],{"className":26605},[25308,25309],[1291,26607,25749],{"className":26608,"style":25958},[25308,25309],[1291,26610],{"className":26611,"style":25409},[25317],[1291,26613,25167],{"className":26614},[25308,25309],[1291,26616,26010],{"className":26617},[25308,25309],[1291,26619,25207],{"className":26620},[25308,25309],[1291,26622,25194],{"className":26623},[25308,25309],[1291,26625,19676],{"className":26626},[25308,25309],[1291,26628,19216],{"className":26629},[25308,25309],[1291,26631,4843],{"className":26632},[25308,25309],[1291,26634,17520],{"className":26635},[25308,25309],[1291,26637,25396],{"className":26638},[25395],[1291,26640,26642],{"className":26641},[25357],[1291,26643,26645],{"className":26644,"style":26137},[25361],[1291,26646],{},[1291,26648],{"className":26649},[26143,25919],[1291,26651,713],{"className":26652},[26143],[73,26654,26655],{},"Here’s the representation through a code snippet, the code does the following:",[665,26657,26658,26661,26664],{},[148,26659,26660],{},"Vectorization with TF-IDF: TfidfVectorizer computes term frequency-inverse document frequency scores for each word in each document.",[148,26662,26663],{},"Term Importance: TF-IDF highlights words that are important to a particular query but uncommon in the overall corpus, providing a more meaningful measure than raw counts.",[148,26665,26666],{},"Data Representation: The resulting matrix is converted into a DataFrame, making it easy to see which words are most “informative” across all search queries.",[3418,26668,26670],{"className":3420,"code":26669,"language":3422,"meta":23,"style":23},"##tf-idf representation \nfrom sklearn.feature_extraction.text import TfidfVectorizer \nvectorizer_tfidf = TfidfVectorizer() \nX_tfidf = vectorizer_tfidf.fit_transform(corpus)\nX_tfidf_dense = X_tfidf.toarray()\ndf_tfidf=pd.DataFrame(X_tfidf_dense,columns=vectorizer_tfidf.get_feature_names_out()) df_tfidf.insert(0, 'Index', range(len(corpus)))\nprint(\"\\nTF-IDF Table:\")\nprint(df_tfidf.to_string(index=False))\n",[3061,26671,26672,26677,26696,26710,26730,26746,26813,26831],{"__ignoreMap":23},[1291,26673,26674],{"class":3427,"line":3428},[1291,26675,26676],{"class":3673},"##tf-idf representation \n",[1291,26678,26679,26681,26683,26685,26687,26689,26691,26693],{"class":3427,"line":24},[1291,26680,3550],{"class":3475},[1291,26682,24058],{"class":3431},[1291,26684,694],{"class":3435},[1291,26686,24063],{"class":3431},[1291,26688,694],{"class":3435},[1291,26690,24068],{"class":3431},[1291,26692,3476],{"class":3475},[1291,26694,26695],{"class":3431}," TfidfVectorizer \n",[1291,26697,26698,26701,26703,26706,26708],{"class":3427,"line":675},[1291,26699,26700],{"class":3431},"vectorizer_tfidf ",[1291,26702,3738],{"class":3435},[1291,26704,26705],{"class":3812}," TfidfVectorizer",[1291,26707,12394],{"class":3435},[1291,26709,7743],{"class":3431},[1291,26711,26712,26715,26717,26720,26722,26724,26726,26728],{"class":3427,"line":3542},[1291,26713,26714],{"class":3431},"X_tfidf ",[1291,26716,3738],{"class":3435},[1291,26718,26719],{"class":3431}," vectorizer_tfidf",[1291,26721,694],{"class":3435},[1291,26723,24392],{"class":3812},[1291,26725,3816],{"class":3435},[1291,26727,24397],{"class":3812},[1291,26729,3827],{"class":3435},[1291,26731,26732,26735,26737,26740,26742,26744],{"class":3427,"line":3547},[1291,26733,26734],{"class":3431},"X_tfidf_dense ",[1291,26736,3738],{"class":3435},[1291,26738,26739],{"class":3431}," X_tfidf",[1291,26741,694],{"class":3435},[1291,26743,24416],{"class":3812},[1291,26745,4871],{"class":3435},[1291,26747,26748,26751,26753,26755,26757,26759,26761,26764,26766,26768,26770,26773,26775,26777,26779,26782,26784,26786,26788,26790,26792,26794,26796,26798,26800,26802,26804,26806,26808,26810],{"class":3427,"line":3572},[1291,26749,26750],{"class":3431},"df_tfidf",[1291,26752,3738],{"class":3435},[1291,26754,12330],{"class":3431},[1291,26756,694],{"class":3435},[1291,26758,12335],{"class":3812},[1291,26760,3816],{"class":3435},[1291,26762,26763],{"class":3812},"X_tfidf_dense",[1291,26765,3566],{"class":3435},[1291,26767,24446],{"class":3819},[1291,26769,3738],{"class":3435},[1291,26771,26772],{"class":3812},"vectorizer_tfidf",[1291,26774,694],{"class":3435},[1291,26776,24456],{"class":3812},[1291,26778,24459],{"class":3435},[1291,26780,26781],{"class":3431}," df_tfidf",[1291,26783,694],{"class":3435},[1291,26785,24467],{"class":3812},[1291,26787,3816],{"class":3435},[1291,26789,9555],{"class":3451},[1291,26791,3566],{"class":3435},[1291,26793,6415],{"class":3435},[1291,26795,24478],{"class":3439},[1291,26797,3436],{"class":3435},[1291,26799,3566],{"class":3435},[1291,26801,24485],{"class":3812},[1291,26803,3816],{"class":3435},[1291,26805,24490],{"class":3812},[1291,26807,3816],{"class":3435},[1291,26809,24397],{"class":3812},[1291,26811,26812],{"class":3435},")))\n",[1291,26814,26815,26817,26819,26821,26824,26827,26829],{"class":3427,"line":3614},[1291,26816,4986],{"class":3812},[1291,26818,3816],{"class":3435},[1291,26820,3691],{"class":3435},[1291,26822,26823],{"class":3431},"\\n",[1291,26825,26826],{"class":3439},"TF-IDF Table:",[1291,26828,3691],{"class":3435},[1291,26830,3827],{"class":3435},[1291,26832,26833,26835,26837,26839,26841,26843,26845,26847],{"class":3427,"line":3640},[1291,26834,4986],{"class":3812},[1291,26836,3816],{"class":3435},[1291,26838,26750],{"class":3812},[1291,26840,694],{"class":3435},[1291,26842,24529],{"class":3812},[1291,26844,3816],{"class":3435},[1291,26846,16885],{"class":3819},[1291,26848,11292],{"class":3435},[73,26850,26851],{},"The above snippet of code converts our corpus into a TF-IDF representation, which measures the importance of words in each document relative to the entire corpus. Using TfidfVectorizer from the sklearn library, it computes the TF-IDF values for all words, transforms the data into a dense array, and organizes it into a dataframe for better readability. The final table shows each word's relevance across the documents, making it useful for text analysis tasks like identifying significant terms in a dataset.",[73,26853,26854],{},[15804,26855,26826],{},[1141,26857],{":zoomable":1143,"alt":23,"className":26858,"sizes":16088,"src":26859},[23361],"\u002Fassets\u002Fcontent\u002Fblog\u002Ftext-embeddings-article\u002Ftf-idf-table.png",[140,26861,26863],{"id":26862},"issues-with-the-traditional-models","Issues with the traditional models",[73,26865,26866],{},"While BoW and TF-IDF are important building blocks, they have limitations. They rely heavily on frequency, do not consider word order, and often produce sparse, high-dimensional representations that are computationally expensive. These models fail to capture nuanced semantic relationships or contextual meanings.",[73,26868,26869,26870,26873],{},"This is where Text embedding shines, models such as Word2Vec, GLoVe, and BERT, represent words as dense high-dimensional vectors. This way, similar words can be represented closer to each other and the context of the words is captured. For example, the words “happy” and “joyful” are closer in the embedding space. Also, the ",[169,26871,26872],{},"word order is preserved",". Sentences such as “The man bit the dog” vs “The dog bit the man” are interpreted differently, while the earlier models would capture no difference between them.",[73,26875,26876],{},"We'll now walk through two of the most popular and highly advanced models, Word2Vec and GloVe.",[140,26878,23970],{"id":26879},"word2vec",[73,26881,26882],{},"The Word2Vec model, developed by Google in 2013, is a highly influential machine learning model widely used in Natural Language Processing (NLP). It learns vector representations of words such that words with similar meanings appear close to each other in the vector space. Two primary methods drive this learning process:",[145,26884,26885,26888],{},[148,26886,26887],{},"Continuous Bag of Words (CBOW)",[148,26889,26890],{},"Skip-gram",[73,26892,26893],{},"This approach is often summarized by the phrase: \"You shall know a word by the company it keeps.\"",[140,26895,26897],{"id":26896},"continuous-bag-of-words-cbow","Continuous Bag Of Words (CBOW):",[73,26899,26900],{},"Here, instead of just using the count of each word, CBOW uses a sliding window to predict a target word based on its surrounding words (context). For example:",[1141,26902],{":zoomable":1143,"alt":23,"className":26903,"sizes":16088,"src":26904},[23361],"\u002Fassets\u002Fcontent\u002Fblog\u002Ftext-embeddings-article\u002Fpathway-pipelines-are-really-cool-text.png",[73,26906,26907],{},"Here the word “pipelines” is the target word, whereas the other surrounding words provide the context.",[73,26909,26910,26911,26914,26915,26918,26919,26922,26923,26926],{},"The CBOW uses a ",[169,26912,26913],{},"simple neural network"," to process the probabilities of the suggestion. This includes an ",[169,26916,26917],{},"input layer",", a single fully connected hidden layer also known as the ",[169,26920,26921],{},"projection layer",", and an ",[169,26924,26925],{},"output layer",". ",[73,26928,26929],{},"Here’s a simplified diagram of the neural network used by the CBOW method",[1141,26931],{":zoomable":1143,"alt":23,"className":26932,"sizes":16088,"src":26933},[23361],"\u002Fassets\u002Fcontent\u002Fblog\u002Ftext-embeddings-article\u002Fneural-network-cbow-method.png",[73,26935,26936,26938,26939,26942,26943,26946],{},[169,26937,26890],{}," This method works ",[169,26940,26941],{},"inversely"," to the CBOW, where the ",[169,26944,26945],{},"target word is known"," and the model tries to guess the context using it. It is more effective in identifying less frequent relationships and capturing more nuanced semantic patterns.",[73,26948,26949],{},"Here’s a diagram for the Skip-gram method:",[1141,26951],{":zoomable":1143,"alt":23,"className":26952,"sizes":16088,"src":26953},[23361],"\u002Fassets\u002Fcontent\u002Fblog\u002Ftext-embeddings-article\u002Fskip-gram-method.png",[3189,26955,26957],{"id":26956},"sample-code-using-word2vec","Sample Code Using Word2Vec",[3418,26959,26961],{"className":3420,"code":26960,"language":3422,"meta":23,"style":23},"import gensim.downloader as api\nmodel = api.load(\"word2vec-google-news-300\")\n\nexample_word = \"computer\"\nsimilar_words = model.most_similar(example_word, topn=10)\nprint(f\"Top 10 words similar to '{example_word}':\")\nfor word, similarity in similar_words:\n    print(f\"{word}: {similarity:.4f}\")\n",[3061,26962,26963,26980,27005,27009,27023,27054,27076,27095],{"__ignoreMap":23},[1291,26964,26965,26967,26970,26972,26975,26977],{"class":3427,"line":3428},[1291,26966,3476],{"class":3475},[1291,26968,26969],{"class":3431}," gensim",[1291,26971,694],{"class":3435},[1291,26973,26974],{"class":3457},"downloader",[1291,26976,3506],{"class":3475},[1291,26978,26979],{"class":3431}," api\n",[1291,26981,26982,26985,26987,26990,26992,26994,26996,26998,27001,27003],{"class":3427,"line":24},[1291,26983,26984],{"class":3431},"model ",[1291,26986,3738],{"class":3435},[1291,26988,26989],{"class":3431}," api",[1291,26991,694],{"class":3435},[1291,26993,9884],{"class":3812},[1291,26995,3816],{"class":3435},[1291,26997,3691],{"class":3435},[1291,26999,27000],{"class":3439},"word2vec-google-news-300",[1291,27002,3691],{"class":3435},[1291,27004,3827],{"class":3435},[1291,27006,27007],{"class":3427,"line":675},[1291,27008,3526],{"emptyLinePlaceholder":35},[1291,27010,27011,27014,27016,27018,27021],{"class":3427,"line":3542},[1291,27012,27013],{"class":3431},"example_word ",[1291,27015,3738],{"class":3435},[1291,27017,3705],{"class":3435},[1291,27019,27020],{"class":3439},"computer",[1291,27022,3746],{"class":3435},[1291,27024,27025,27028,27030,27033,27035,27038,27040,27043,27045,27048,27050,27052],{"class":3427,"line":3547},[1291,27026,27027],{"class":3431},"similar_words ",[1291,27029,3738],{"class":3435},[1291,27031,27032],{"class":3431}," model",[1291,27034,694],{"class":3435},[1291,27036,27037],{"class":3812},"most_similar",[1291,27039,3816],{"class":3435},[1291,27041,27042],{"class":3812},"example_word",[1291,27044,3566],{"class":3435},[1291,27046,27047],{"class":3819}," topn",[1291,27049,3738],{"class":3435},[1291,27051,6769],{"class":3451},[1291,27053,3827],{"class":3435},[1291,27055,27056,27058,27060,27062,27065,27067,27069,27071,27074],{"class":3427,"line":3572},[1291,27057,4986],{"class":3812},[1291,27059,3816],{"class":3435},[1291,27061,9643],{"class":7739},[1291,27063,27064],{"class":3439},"\"Top 10 words similar to '",[1291,27066,8770],{"class":3451},[1291,27068,27042],{"class":3812},[1291,27070,9671],{"class":3451},[1291,27072,27073],{"class":3439},"':\"",[1291,27075,3827],{"class":3435},[1291,27077,27078,27080,27083,27085,27088,27090,27093],{"class":3427,"line":3614},[1291,27079,24227],{"class":3475},[1291,27081,27082],{"class":3431}," word",[1291,27084,3566],{"class":3435},[1291,27086,27087],{"class":3431}," similarity ",[1291,27089,9566],{"class":3475},[1291,27091,27092],{"class":3431}," similar_words",[1291,27094,5243],{"class":3435},[1291,27096,27097,27100,27102,27104,27106,27108,27111,27113,27116,27118,27121,27124,27126,27128],{"class":3427,"line":3640},[1291,27098,27099],{"class":3812},"    print",[1291,27101,3816],{"class":3435},[1291,27103,9643],{"class":7739},[1291,27105,3691],{"class":3439},[1291,27107,8770],{"class":3451},[1291,27109,27110],{"class":3812},"word",[1291,27112,9671],{"class":3451},[1291,27114,27115],{"class":3439},": ",[1291,27117,8770],{"class":3451},[1291,27119,27120],{"class":3812},"similarity",[1291,27122,27123],{"class":7739},":.4f",[1291,27125,9671],{"class":3451},[1291,27127,3691],{"class":3439},[1291,27129,3827],{"class":3435},[73,27131,27132],{},"This code does the following:",[665,27134,27135,27141,27147],{},[148,27136,27137,27140],{},[169,27138,27139],{},"Pre-trained Model Loading",": The code loads the pre-trained Google News Word2Vec model, which already has learned word meanings from a vast corpus of text.",[148,27142,27143,27146],{},[169,27144,27145],{},"Semantic Similarity",": most_similar() finds words closest in meaning to the example word “computer” by examining the vector space learned by the model.",[148,27148,27149,27152],{},[169,27150,27151],{},"Output",": The top 10 similar words and their similarity scores are printed, showcasing how Word2Vec captures semantic relationships rather than just word frequency.",[73,27154,27155],{},[15804,27156,27157],{},"Output of the above example:",[3418,27159,27162],{"className":6347,"code":27160,"filename":27161,"language":6349,"meta":23,"style":23},"Top 10 words similar to 'computer':\ncomputers: 0.7979\nlaptop: 0.6640\nlaptop_computer: 0.6549\nComputer: 0.6473\ncom_puter: 0.6082\ntechnician_Leonard_Luchko: 0.5663\nmainframes_minicomputers: 0.5618\nlaptop_computers: 0.5585\nPC: 0.5540\nmaker_Dell_DELL.O: 0.5519\n","Code output",[3061,27163,27164,27189,27197,27205,27213,27221,27229,27237,27245,27253,27261],{"__ignoreMap":23},[1291,27165,27166,27169,27172,27175,27178,27181,27183,27185,27187],{"class":3427,"line":3428},[1291,27167,27168],{"class":6356},"Top",[1291,27170,27171],{"class":3451}," 10",[1291,27173,27174],{"class":3439}," words",[1291,27176,27177],{"class":3439}," similar",[1291,27179,27180],{"class":3439}," to",[1291,27182,6415],{"class":3435},[1291,27184,27020],{"class":3439},[1291,27186,3436],{"class":3435},[1291,27188,5243],{"class":3439},[1291,27190,27191,27194],{"class":3427,"line":24},[1291,27192,27193],{"class":6356},"computers:",[1291,27195,27196],{"class":3451}," 0.7979\n",[1291,27198,27199,27202],{"class":3427,"line":675},[1291,27200,27201],{"class":6356},"laptop:",[1291,27203,27204],{"class":3451}," 0.6640\n",[1291,27206,27207,27210],{"class":3427,"line":3542},[1291,27208,27209],{"class":6356},"laptop_computer:",[1291,27211,27212],{"class":3451}," 0.6549\n",[1291,27214,27215,27218],{"class":3427,"line":3547},[1291,27216,27217],{"class":6356},"Computer:",[1291,27219,27220],{"class":3451}," 0.6473\n",[1291,27222,27223,27226],{"class":3427,"line":3572},[1291,27224,27225],{"class":6356},"com_puter:",[1291,27227,27228],{"class":3451}," 0.6082\n",[1291,27230,27231,27234],{"class":3427,"line":3614},[1291,27232,27233],{"class":6356},"technician_Leonard_Luchko:",[1291,27235,27236],{"class":3451}," 0.5663\n",[1291,27238,27239,27242],{"class":3427,"line":3640},[1291,27240,27241],{"class":6356},"mainframes_minicomputers:",[1291,27243,27244],{"class":3451}," 0.5618\n",[1291,27246,27247,27250],{"class":3427,"line":3665},[1291,27248,27249],{"class":6356},"laptop_computers:",[1291,27251,27252],{"class":3451}," 0.5585\n",[1291,27254,27255,27258],{"class":3427,"line":3670},[1291,27256,27257],{"class":6356},"PC:",[1291,27259,27260],{"class":3451}," 0.5540\n",[1291,27262,27263,27266],{"class":3427,"line":3677},[1291,27264,27265],{"class":6356},"maker_Dell_DELL.O:",[1291,27267,27268],{"class":3451}," 0.5519\n",[1141,27270],{":zoomable":1143,"alt":23,"className":27271,"sizes":16088,"src":27272},[23361],"\u002Fassets\u002Fcontent\u002Fblog\u002Ftext-embeddings-article\u002Fembedding-vector-for-computer.png",[140,27274,27276],{"id":27275},"global-representation-of-vectors-glove","Global Representation of Vectors (GloVe)",[73,27278,27279,27280,27285],{},"The GloVe method, developed at Stanford University by Jeffrey Pennington and collaborators, is known as Global Vectors because it leverages the entire corpus’s global statistics. Unlike Word2Vec, which focuses on local context windows, GloVe constructs a ",[169,27281,27282],{},[15804,27283,27284],{},"co-occurrence matrix"," that measures how frequently pairs of words appear together. Using this global perspective, GloVe effectively captures both semantic and syntactic relationships, making it especially powerful for discovering word analogies.",[1141,27287],{":zoomable":1143,"alt":23,"className":27288,"sizes":16088,"src":27289},[23361],"\u002Fassets\u002Fcontent\u002Fblog\u002Ftext-embeddings-article\u002Fglobal-representation-of-vectors.png",[140,27291,27293],{"id":27292},"co-occurrence-matrix-example","Co-occurrence Matrix Example",[73,27295,27296],{},"Consider a small sample corpus:",[3418,27298,27300],{"className":3420,"code":27299,"language":3422,"meta":23,"style":23},"corpus = [\n\"I like pathway\",\"I like NLP\",\"I enjoy pathway\",\n\"deep learning is fun\",\"NLP is amazing\",\"pathway is fun\"\n]\n",[3061,27301,27302,27310,27339,27366],{"__ignoreMap":23},[1291,27303,27304,27306,27308],{"class":3427,"line":3428},[1291,27305,24085],{"class":3431},[1291,27307,3738],{"class":3435},[1291,27309,6785],{"class":3435},[1291,27311,27312,27314,27317,27319,27321,27323,27326,27328,27330,27332,27335,27337],{"class":3427,"line":24},[1291,27313,3691],{"class":3435},[1291,27315,27316],{"class":3439},"I like pathway",[1291,27318,3691],{"class":3435},[1291,27320,3566],{"class":3435},[1291,27322,3691],{"class":3435},[1291,27324,27325],{"class":3439},"I like NLP",[1291,27327,3691],{"class":3435},[1291,27329,3566],{"class":3435},[1291,27331,3691],{"class":3435},[1291,27333,27334],{"class":3439},"I enjoy pathway",[1291,27336,3691],{"class":3435},[1291,27338,4107],{"class":3435},[1291,27340,27341,27343,27346,27348,27350,27352,27355,27357,27359,27361,27364],{"class":3427,"line":675},[1291,27342,3691],{"class":3435},[1291,27344,27345],{"class":3439},"deep learning is fun",[1291,27347,3691],{"class":3435},[1291,27349,3566],{"class":3435},[1291,27351,3691],{"class":3435},[1291,27353,27354],{"class":3439},"NLP is amazing",[1291,27356,3691],{"class":3435},[1291,27358,3566],{"class":3435},[1291,27360,3691],{"class":3435},[1291,27362,27363],{"class":3439},"pathway is fun",[1291,27365,3746],{"class":3435},[1291,27367,27368],{"class":3427,"line":3542},[1291,27369,5267],{"class":3435},[1141,27371],{":zoomable":1143,"alt":23,"className":27372,"sizes":16088,"src":27373},[23361],"\u002Fassets\u002Fcontent\u002Fblog\u002Ftext-embeddings-article\u002Fco-occurrence.png",[73,27375,27376,27377,27382],{},"With a ",[169,27378,27379],{},[15804,27380,27381],{},"window size of 2",", the model considers each target word and the two words surrounding it. This approach helps the model understand deeper relationships between words beyond simple frequency counts.",[3189,27384,27386],{"id":27385},"sample-code-using-glove","Sample Code Using GloVe",[3418,27388,27390],{"className":3420,"code":27389,"language":3422,"meta":23,"style":23},"glove_file = 'glove.6B.100d.txt'\nglove_model = load_glove_model(glove_file)\ndef find_similar_words(word, model, topn=10):\n     word_vector = model[word]\n     similarities = {}\n     for other_word, other_vector in model.items():\n         if other_word != word:\n            similarity = cosine_similarity([word_vector],  [other_vector])[0][0]\n            similarities[other_word] = similarity\n     similar_words = sorted(similarities.items(), key=lambda item: item[1], reverse=True)[:topn]\n     return similar_words\nexample_word = \"lovely\"\nsimilar_words_glove = find_similar_words(example_word, glove_model, topn=10)\nprint(f\"Top 10 words similar to '{example_word}' (GloVe):\")\nfor word, similarity in similar_words_glove:\n    print(f\"{word}: {similarity:.4f}\")\n",[3061,27391,27392,27406,27423,27448,27463,27472,27497,27512,27546,27563,27616,27624,27637,27665,27686,27703],{"__ignoreMap":23},[1291,27393,27394,27397,27399,27401,27404],{"class":3427,"line":3428},[1291,27395,27396],{"class":3431},"glove_file ",[1291,27398,3738],{"class":3435},[1291,27400,6415],{"class":3435},[1291,27402,27403],{"class":3439},"glove.6B.100d.txt",[1291,27405,5188],{"class":3435},[1291,27407,27408,27411,27413,27416,27418,27421],{"class":3427,"line":24},[1291,27409,27410],{"class":3431},"glove_model ",[1291,27412,3738],{"class":3435},[1291,27414,27415],{"class":3812}," load_glove_model",[1291,27417,3816],{"class":3435},[1291,27419,27420],{"class":3812},"glove_file",[1291,27422,3827],{"class":3435},[1291,27424,27425,27427,27430,27432,27434,27436,27438,27440,27442,27444,27446],{"class":3427,"line":675},[1291,27426,11398],{"class":7739},[1291,27428,27429],{"class":3812}," find_similar_words",[1291,27431,3816],{"class":3435},[1291,27433,27110],{"class":3819},[1291,27435,3566],{"class":3435},[1291,27437,27032],{"class":3819},[1291,27439,3566],{"class":3435},[1291,27441,27047],{"class":3819},[1291,27443,3738],{"class":3435},[1291,27445,6769],{"class":3451},[1291,27447,11948],{"class":3435},[1291,27449,27450,27453,27455,27457,27459,27461],{"class":3427,"line":3542},[1291,27451,27452],{"class":3431},"     word_vector ",[1291,27454,3738],{"class":3435},[1291,27456,27032],{"class":3431},[1291,27458,3688],{"class":3435},[1291,27460,27110],{"class":3431},[1291,27462,5267],{"class":3435},[1291,27464,27465,27468,27470],{"class":3427,"line":3547},[1291,27466,27467],{"class":3431},"     similarities ",[1291,27469,3738],{"class":3435},[1291,27471,7884],{"class":3435},[1291,27473,27474,27477,27480,27482,27485,27487,27489,27491,27494],{"class":3427,"line":3572},[1291,27475,27476],{"class":3475},"     for",[1291,27478,27479],{"class":3431}," other_word",[1291,27481,3566],{"class":3435},[1291,27483,27484],{"class":3431}," other_vector ",[1291,27486,9566],{"class":3475},[1291,27488,27032],{"class":3431},[1291,27490,694],{"class":3435},[1291,27492,27493],{"class":3812},"items",[1291,27495,27496],{"class":3435},"():\n",[1291,27498,27499,27502,27505,27508,27510],{"class":3427,"line":3614},[1291,27500,27501],{"class":3475},"         if",[1291,27503,27504],{"class":3431}," other_word ",[1291,27506,27507],{"class":3435},"!=",[1291,27509,27082],{"class":3431},[1291,27511,5243],{"class":3435},[1291,27513,27514,27517,27519,27522,27524,27527,27529,27532,27535,27538,27540,27542,27544],{"class":3427,"line":3640},[1291,27515,27516],{"class":3431},"            similarity ",[1291,27518,3738],{"class":3435},[1291,27520,27521],{"class":3812}," cosine_similarity",[1291,27523,9547],{"class":3435},[1291,27525,27526],{"class":3812},"word_vector",[1291,27528,19075],{"class":3435},[1291,27530,27531],{"class":3435},"  [",[1291,27533,27534],{"class":3812},"other_vector",[1291,27536,27537],{"class":3435},"])[",[1291,27539,9555],{"class":3451},[1291,27541,20061],{"class":3435},[1291,27543,9555],{"class":3451},[1291,27545,5267],{"class":3435},[1291,27547,27548,27551,27553,27556,27558,27560],{"class":3427,"line":3665},[1291,27549,27550],{"class":3431},"            similarities",[1291,27552,3688],{"class":3435},[1291,27554,27555],{"class":3431},"other_word",[1291,27557,3699],{"class":3435},[1291,27559,3702],{"class":3435},[1291,27561,27562],{"class":3431}," similarity\n",[1291,27564,27565,27568,27570,27573,27575,27578,27580,27582,27584,27587,27589,27592,27595,27597,27599,27601,27603,27605,27608,27611,27614],{"class":3427,"line":3670},[1291,27566,27567],{"class":3431},"     similar_words ",[1291,27569,3738],{"class":3435},[1291,27571,27572],{"class":3812}," sorted",[1291,27574,3816],{"class":3435},[1291,27576,27577],{"class":3812},"similarities",[1291,27579,694],{"class":3435},[1291,27581,27493],{"class":3812},[1291,27583,10858],{"class":3435},[1291,27585,27586],{"class":3819}," key",[1291,27588,3738],{"class":3435},[1291,27590,27591],{"class":7739},"lambda",[1291,27593,27594],{"class":3819}," item",[1291,27596,4390],{"class":3435},[1291,27598,27594],{"class":3812},[1291,27600,3688],{"class":3435},[1291,27602,24626],{"class":3451},[1291,27604,19075],{"class":3435},[1291,27606,27607],{"class":3819}," reverse",[1291,27609,27610],{"class":3435},"=True)[:",[1291,27612,27613],{"class":3431},"topn",[1291,27615,5267],{"class":3435},[1291,27617,27618,27621],{"class":3427,"line":3677},[1291,27619,27620],{"class":3475},"     return",[1291,27622,27623],{"class":3431}," similar_words\n",[1291,27625,27626,27628,27630,27632,27635],{"class":3427,"line":3877},[1291,27627,27013],{"class":3431},[1291,27629,3738],{"class":3435},[1291,27631,3705],{"class":3435},[1291,27633,27634],{"class":3439},"lovely",[1291,27636,3746],{"class":3435},[1291,27638,27639,27642,27644,27646,27648,27650,27652,27655,27657,27659,27661,27663],{"class":3427,"line":3916},[1291,27640,27641],{"class":3431},"similar_words_glove ",[1291,27643,3738],{"class":3435},[1291,27645,27429],{"class":3812},[1291,27647,3816],{"class":3435},[1291,27649,27042],{"class":3812},[1291,27651,3566],{"class":3435},[1291,27653,27654],{"class":3812}," glove_model",[1291,27656,3566],{"class":3435},[1291,27658,27047],{"class":3819},[1291,27660,3738],{"class":3435},[1291,27662,6769],{"class":3451},[1291,27664,3827],{"class":3435},[1291,27666,27667,27669,27671,27673,27675,27677,27679,27681,27684],{"class":3427,"line":4519},[1291,27668,4986],{"class":3812},[1291,27670,3816],{"class":3435},[1291,27672,9643],{"class":7739},[1291,27674,27064],{"class":3439},[1291,27676,8770],{"class":3451},[1291,27678,27042],{"class":3812},[1291,27680,9671],{"class":3451},[1291,27682,27683],{"class":3439},"' (GloVe):\"",[1291,27685,3827],{"class":3435},[1291,27687,27688,27690,27692,27694,27696,27698,27701],{"class":3427,"line":6038},[1291,27689,24227],{"class":3475},[1291,27691,27082],{"class":3431},[1291,27693,3566],{"class":3435},[1291,27695,27087],{"class":3431},[1291,27697,9566],{"class":3475},[1291,27699,27700],{"class":3431}," similar_words_glove",[1291,27702,5243],{"class":3435},[1291,27704,27705,27707,27709,27711,27713,27715,27717,27719,27721,27723,27725,27727,27729,27731],{"class":3427,"line":6043},[1291,27706,27099],{"class":3812},[1291,27708,3816],{"class":3435},[1291,27710,9643],{"class":7739},[1291,27712,3691],{"class":3439},[1291,27714,8770],{"class":3451},[1291,27716,27110],{"class":3812},[1291,27718,9671],{"class":3451},[1291,27720,27115],{"class":3439},[1291,27722,8770],{"class":3451},[1291,27724,27120],{"class":3812},[1291,27726,27123],{"class":7739},[1291,27728,9671],{"class":3451},[1291,27730,3691],{"class":3439},[1291,27732,3827],{"class":3435},[73,27734,27735],{},"Explanation:",[665,27737,27738,27747,27756],{},[148,27739,27740,27743,27744,27746],{},[169,27741,27742],{},"Loading the GloVe Model",": The ",[3061,27745,27403],{}," file contains the pre-trained GloVe embeddings. The “6B” indicates it was trained on a dataset with 6 billion tokens, and “100d” means each word is represented by a 100-dimensional vector.",[148,27748,27749,27743,27752,27755],{},[169,27750,27751],{},"Finding Similar Words",[3061,27753,27754],{},"find_similar_words()"," function calculates the cosine similarity between the target word’s vector and every other word’s vector in the model, returning the top matches.",[148,27757,27758,27761],{},[169,27759,27760],{},"Global Context",": Unlike frequency-based models, GloVe embeddings capture both local context and global statistical properties, enabling more nuanced relationships to emerge.",[73,27763,27764],{},[15804,27765,27766],{},"Output:",[1141,27768],{":zoomable":1143,"alt":23,"className":27769,"sizes":16088,"src":27770},[23361],"\u002Fassets\u002Fcontent\u002Fblog\u002Ftext-embeddings-article\u002Ftop-10-words.png",[140,27772,27774],{"id":27773},"how-text-embeddings-capture-analogies","How text embeddings capture analogies",[73,27776,27777],{},"Let us now try to understand how Text Embeddings capture the contextual relationships between countries and their capitals or verbs and tenses using GloVe embeddings, t-SNE, and PCA for visualization.\nAs you've already looked through GloVe before, let us go over t-SNE and PCA in the following sections.",[140,27779,27781],{"id":27780},"dimensionality-reduction-techniques-for-visualization","Dimensionality Reduction Techniques for Visualization",[3189,27783,27785],{"id":27784},"t-sne-t-distributed-stochastic-neighbor-embedding","t-SNE (t-distributed Stochastic Neighbor Embedding)",[73,27787,27788,27789,3126,27792,27795],{},"t-SNE, short for t-distributed Stochastic Neighbor Emulation, is an unsupervised Machine Learning algorithm for dimensionality reduction ideal for visualizing high-dimensional data. It was developed in 2008 by ",[169,27790,27791],{},"Laurens van der Maaten",[169,27793,27794],{},"Geoffery Hinton",". The process involves embedding high-dimensional points in low dimensions so that data loss is to a minimum. It preserves similarities between points as Nearby points in the high-dimensional space correspond to nearby embedded low-dimensional points. The same applies to distant points.",[3189,27797,27799],{"id":27798},"pca-principal-component-analysis","PCA (Principal Component Analysis)",[73,27801,27802,27803,27806],{},"The PCA unsupervised machine learning algorithm, which stands for Principal Component Analysis, was invented by the mathematician ",[169,27804,27805],{},"Karl Pearson"," in 1901. The following method also focuses on reducing dimensionality. Data in the high-dimensional space is mapped to data in the lower-dimensional space; the variance of the data in the lower-dimensional space should be the maximum. It is a statistical process that uses an orthogonal transformation and converts a set of correlated variables to a set of uncorrelated variables.",[73,27808,27809],{},"Now that you are well equipped with these powerful dimensionality reduction techniques, let’s move on to our fun analogies.",[73,27811,27812,27813,27816,27817,27820,27821,27824],{},"The fundamental approach of Text embedding is that you can represent ",[169,27814,27815],{},"words as vectors"," where every word can be expressed as a ",[169,27818,27819],{},"numerical vector in a high-dimensional space",". Let's take the word 'king' for example. You can represent 'king' in an n-dimensional space where it will have n attributes. Words that appear in a similar context to 'king' will be ",[169,27822,27823],{},"closer"," to it in the embedding space. You can perform certain vector operations to reveal relationships between different words. The relationships can be shown as follows:",[665,27826,27827,27841,27852],{},[148,27828,27829,27832,27833,27835,27838,27840],{},[169,27830,27831],{},"Countries and Capitals:"," The interrelation between Paris and France is analogous to the interrelation between Berlin and Germany. Subtracting France from Paris isolates the concept of a \"capital city\" concerning a country. The relation between the four words in the vector space can be presented as follows:",[2949,27834],{},[15804,27836,27837],{},"vec(Paris) − vec(France)≈vec(Germany) - vec(Berlin)",[2949,27839],{},"\nHence, you can say, '\"Paris is to France as Berlin is to Germany\".",[148,27842,27843,27846,27847,27849],{},[169,27844,27845],{},"Verb Tenses :"," Verb Tenses can also be shown similarly. You can consider \"Walking is to Walked\" as \"Running is to Ran\". In the vector space, it is represented as follows:",[2949,27848],{},[15804,27850,27851],{},"Walking – Walked ≈ Running – Ran",[148,27853,27854,27857,27858,27860,27863,27865],{},[169,27855,27856],{},"Gender Analogy:"," Construct a relationship between the words 'king' and 'queen'. In the embedding space, it can be expressed as follows:",[2949,27859],{},[15804,27861,27862],{},"vec(king)−vec(man)+vec(woman)≈vec(queen)",[2949,27864],{},"\nIn the above expression, you can understand it as when you remove \"man\" from \"king\", it leaves a royal element, and when you add \"woman\", it becomes \"queen\".",[145,27867,27868,28030,28038],{},[148,27869,27870,27873],{},[15804,27871,27872],{},"Gender Analogy Suggestions code:",[3418,27874,27876],{"className":3420,"code":27875,"language":3422,"meta":23,"style":23},"import gensim.downloader as api\nmodel = api.load(\"word2vec-google-news-300\")\nresult = model.most_similar(positive=['king',  'woman'], negative=['man'])\nfor i in result:\n      print(f\"{i[0]:\u003C{20}}  {i[1]:.6f}\")`\n",[3061,27877,27878,27892,27914,27966,27980],{"__ignoreMap":23},[1291,27879,27880,27882,27884,27886,27888,27890],{"class":3427,"line":3428},[1291,27881,3476],{"class":3475},[1291,27883,26969],{"class":3431},[1291,27885,694],{"class":3435},[1291,27887,26974],{"class":3457},[1291,27889,3506],{"class":3475},[1291,27891,26979],{"class":3431},[1291,27893,27894,27896,27898,27900,27902,27904,27906,27908,27910,27912],{"class":3427,"line":24},[1291,27895,26984],{"class":3431},[1291,27897,3738],{"class":3435},[1291,27899,26989],{"class":3431},[1291,27901,694],{"class":3435},[1291,27903,9884],{"class":3812},[1291,27905,3816],{"class":3435},[1291,27907,3691],{"class":3435},[1291,27909,27000],{"class":3439},[1291,27911,3691],{"class":3435},[1291,27913,3827],{"class":3435},[1291,27915,27916,27919,27921,27923,27925,27927,27929,27932,27934,27936,27939,27941,27943,27945,27948,27950,27952,27955,27957,27959,27962,27964],{"class":3427,"line":675},[1291,27917,27918],{"class":3431},"result ",[1291,27920,3738],{"class":3435},[1291,27922,27032],{"class":3431},[1291,27924,694],{"class":3435},[1291,27926,27037],{"class":3812},[1291,27928,3816],{"class":3435},[1291,27930,27931],{"class":3819},"positive",[1291,27933,19066],{"class":3435},[1291,27935,3436],{"class":3435},[1291,27937,27938],{"class":3439},"king",[1291,27940,3436],{"class":3435},[1291,27942,3566],{"class":3435},[1291,27944,22141],{"class":3435},[1291,27946,27947],{"class":3439},"woman",[1291,27949,3436],{"class":3435},[1291,27951,19075],{"class":3435},[1291,27953,27954],{"class":3819}," negative",[1291,27956,19066],{"class":3435},[1291,27958,3436],{"class":3435},[1291,27960,27961],{"class":3439},"man",[1291,27963,3436],{"class":3435},[1291,27965,9572],{"class":3435},[1291,27967,27968,27970,27973,27975,27978],{"class":3427,"line":3542},[1291,27969,24227],{"class":3475},[1291,27971,27972],{"class":3431}," i ",[1291,27974,9566],{"class":3475},[1291,27976,27977],{"class":3431}," result",[1291,27979,5243],{"class":3435},[1291,27981,27982,27985,27987,27989,27991,27993,27995,27997,27999,28001,28004,28007,28010,28012,28014,28016,28018,28021,28023,28025,28027],{"class":3427,"line":3547},[1291,27983,27984],{"class":3812},"      print",[1291,27986,3816],{"class":3435},[1291,27988,9643],{"class":7739},[1291,27990,3691],{"class":3439},[1291,27992,8770],{"class":3451},[1291,27994,25164],{"class":3812},[1291,27996,3688],{"class":3435},[1291,27998,9555],{"class":3451},[1291,28000,3699],{"class":3435},[1291,28002,28003],{"class":7739},":\u003C",[1291,28005,28006],{"class":3451},"{20}}",[1291,28008,28009],{"class":3451},"  {",[1291,28011,25164],{"class":3812},[1291,28013,3688],{"class":3435},[1291,28015,24626],{"class":3451},[1291,28017,3699],{"class":3435},[1291,28019,28020],{"class":7739},":.6f",[1291,28022,9671],{"class":3451},[1291,28024,3691],{"class":3439},[1291,28026,713],{"class":3435},[1291,28028,28029],{"class":3431},"`\n",[148,28031,28032,28034],{},[15804,28033,27766],{},[1141,28035],{":zoomable":1143,"alt":23,"className":28036,"sizes":16088,"src":28037},[23361],"\u002Fassets\u002Fcontent\u002Fblog\u002Ftext-embeddings-article\u002Foutput.png",[148,28039,28040,28043],{},[15804,28041,28042],{},"PCA Plots:",[1141,28044],{":zoomable":1143,"alt":23,"className":28045,"sizes":16088,"src":28046},[23361],"\u002Fassets\u002Fcontent\u002Fblog\u002Ftext-embeddings-article\u002Fpca-plot.png",[140,28048,28050],{"id":28049},"evaluating-models-during-production-using-rag","Evaluating models during production using RAG",[73,28052,28053,28054,28057,28058,28061],{},"The Internet is constantly flowing with an enormous amount of data, and our models need to keep up with it, constantly evolving according to the trends and information. One key technique to handle this is the ",[169,28055,28056],{},"Retrieval-Augmented-Generation (RAG)",". RAG is an AI framework that combines the strengths of traditional information retrieval systems (such as search and databases) with the capabilities of ",[169,28059,28060],{},"generative large language models (LLMs)",". The flow is as follows:-",[145,28063,28064,28067,28070],{},[148,28065,28066],{},"The user first enters a query, which is transformed into its embeddings to capture its semantic meaning.",[148,28068,28069],{},"Documents are retrieved based on the embeddings, and then these docs, along with the embeddings are fed into a generative model.",[148,28071,28072],{},"This helps to create a very contextual, efficient response, which is very much suited to the user.",[140,28074,28076],{"id":28075},"real-time-embedding-and-document-indexing-with-pathway-live-data-framework","Real-Time Embedding and Document Indexing with Pathway Live Data Framework",[73,28078,28079,28080,28084],{},"While techniques like RAG help keep models dynamically updated with the latest data, effective deployment in production environments often demands robust, scalable solutions to manage continuous data streams and evolving embedding spaces. This is where the ",[77,28081,28082],{"href":711},[169,28083,1279],{}," comes in.",[73,28086,28087],{},"The Pathway Live Data Framework is a high-throughput, low-latency framework designed for building and deploying RAG-powered AI applications at scale. It offers a cloud-agnostic, container-based approach with over 350 data source connectors. It integrates YAML, Python, and SQL for flexible configuration and supports 300+ data connectors, including S3, Delta Lake, Iceberg, Kafka, NATS, and SharePoint.",[73,28089,28090],{},"The following example demonstrates how the framework’s pipeline handles document ingestion, parsing, splitting, embedding, and indexing in real time. As soon as a new file is detected in the specified data source (here, a local folder with mode=\"streaming\"), the framework automatically re-runs the necessary steps to incorporate that file into the semantic index—no manual re-running of embeddings or re-indexing required.",[3189,28092,28094],{"id":28093},"setup-and-installations","Setup and Installations",[73,28096,28097],{},"If you are using Pathway Live Data Framework locally, you will need to install the Pathway Live Data Framework LLM xpack with:",[3418,28099,28101],{"className":6347,"code":28100,"language":6349,"meta":23,"style":23},"!pip install \"pathway[all]\"\n",[3061,28102,28103],{"__ignoreMap":23},[1291,28104,28105,28107,28109,28111,28113,28115],{"class":3427,"line":3428},[1291,28106,9175],{"class":3435},[1291,28108,6357],{"class":6356},[1291,28110,6360],{"class":3439},[1291,28112,3705],{"class":3435},[1291,28114,9236],{"class":3439},[1291,28116,3746],{"class":3435},[73,28118,28119],{},"Data Folder Setup: Creates a local directory named data\u002F where we will store and read our documents.",[3418,28121,28122],{"className":6347,"code":5169,"language":6349,"meta":23,"style":23},[3061,28123,28124],{"__ignoreMap":23},[1291,28125,28126,28128,28130,28132,28134,28136],{"class":3427,"line":3428},[1291,28127,9175],{"class":3435},[1291,28129,6409],{"class":6356},[1291,28131,6412],{"class":3439},[1291,28133,6415],{"class":3435},[1291,28135,5185],{"class":3439},[1291,28137,5188],{"class":3435},[3418,28139,28141],{"className":3420,"code":28140,"language":3422,"meta":23,"style":23},"import os\n\nimport json\nfrom typing import Iterable, Literal, List\nfrom pydantic import BaseModel, Field\n\n# needed for the OpenAI embedder and the LLM we will use below, you can change the embedding provider, see the documentation:\n# https:\u002F\u002Fpathway.com\u002Fdevelopers\u002Fapi-docs\u002Fpathway-xpacks-llm\u002Fembedders\nos.environ[\"OPENAI_API_KEY\"] = \"sk-\"\n",[3061,28142,28143,28149,28153,28160,28182,28199,28203,28208,28213],{"__ignoreMap":23},[1291,28144,28145,28147],{"class":3427,"line":3428},[1291,28146,3476],{"class":3475},[1291,28148,3486],{"class":3431},[1291,28150,28151],{"class":3427,"line":24},[1291,28152,3526],{"emptyLinePlaceholder":35},[1291,28154,28155,28157],{"class":3427,"line":675},[1291,28156,3476],{"class":3475},[1291,28158,28159],{"class":3431}," json\n",[1291,28161,28162,28164,28167,28169,28172,28174,28177,28179],{"class":3427,"line":3542},[1291,28163,3550],{"class":3475},[1291,28165,28166],{"class":3431}," typing ",[1291,28168,3476],{"class":3475},[1291,28170,28171],{"class":3431}," Iterable",[1291,28173,3566],{"class":3435},[1291,28175,28176],{"class":3431}," Literal",[1291,28178,3566],{"class":3435},[1291,28180,28181],{"class":3431}," List\n",[1291,28183,28184,28186,28189,28191,28194,28196],{"class":3427,"line":3547},[1291,28185,3550],{"class":3475},[1291,28187,28188],{"class":3431}," pydantic ",[1291,28190,3476],{"class":3475},[1291,28192,28193],{"class":3431}," BaseModel",[1291,28195,3566],{"class":3435},[1291,28197,28198],{"class":3431}," Field\n",[1291,28200,28201],{"class":3427,"line":3572},[1291,28202,3526],{"emptyLinePlaceholder":35},[1291,28204,28205],{"class":3427,"line":3614},[1291,28206,28207],{"class":3673},"# needed for the OpenAI embedder and the LLM we will use below, you can change the embedding provider, see the documentation:\n",[1291,28209,28210],{"class":3427,"line":3640},[1291,28211,28212],{"class":3673},"# https:\u002F\u002Fpathway.com\u002Fdevelopers\u002Fapi-docs\u002Fpathway-xpacks-llm\u002Fembedders\n",[1291,28214,28215,28217,28219,28221,28223,28225,28227,28229,28231,28233,28235,28238],{"class":3427,"line":3665},[1291,28216,3680],{"class":3431},[1291,28218,694],{"class":3435},[1291,28220,3685],{"class":3457},[1291,28222,3688],{"class":3435},[1291,28224,3691],{"class":3435},[1291,28226,5228],{"class":3439},[1291,28228,3691],{"class":3435},[1291,28230,3699],{"class":3435},[1291,28232,3702],{"class":3435},[1291,28234,3705],{"class":3435},[1291,28236,28237],{"class":3439},"sk-",[1291,28239,3746],{"class":3435},[3189,28241,28243],{"id":28242},"importing-a-sample-pdf-file","Importing a Sample PDF File",[3418,28245,28247],{"className":6347,"code":28246,"language":6349,"meta":23,"style":23},"!wget -q -P .\u002Fdata\u002F https:\u002F\u002Fgithub.com\u002Fpathwaycom\u002Fllm-app\u002Fraw\u002Fmain\u002Ftemplates\u002Fquestion_answering_rag\u002Fdata\u002FIdeanomicsInc_20160330_10-K_EX-10.26_9512211_EX-10.26_Content%20License%20Agreement.pdf\n",[3061,28248,28249],{"__ignoreMap":23},[1291,28250,28251,28253,28255,28258,28261,28264],{"class":3427,"line":3428},[1291,28252,9175],{"class":3435},[1291,28254,7203],{"class":6356},[1291,28256,28257],{"class":3439}," -q",[1291,28259,28260],{"class":3439}," -P",[1291,28262,28263],{"class":3439}," .\u002Fdata\u002F",[1291,28265,28266],{"class":3439}," https:\u002F\u002Fgithub.com\u002Fpathwaycom\u002Fllm-app\u002Fraw\u002Fmain\u002Ftemplates\u002Fquestion_answering_rag\u002Fdata\u002FIdeanomicsInc_20160330_10-K_EX-10.26_9512211_EX-10.26_Content%20License%20Agreement.pdf\n",[3189,28268,28270],{"id":28269},"pathway-imports-and-pipeline-setup","Pathway Imports and Pipeline Setup",[665,28272,28273,28279,28296,28325,28336],{},[148,28274,28275,28278],{},[169,28276,28277],{},"Imports",": We import Pathway’s standard libraries and LLM toolkits.",[148,28280,28281,28284,28285,19008,28287,694,28290,28292,28293,28295],{},[169,28282,28283],{},"Reading in Streaming Mode",": Notice the ",[3061,28286,20515],{},[3061,28288,28289],{},"pw.io.fs.read()",[2949,28291],{},"\nThis tells Pathway to watch the ",[3061,28294,3928],{}," folder for any file changes in real time.",[148,28297,28298,4390,28301],{},[169,28299,28300],{},"Document Processing",[145,28302,28303,28308,28314,28319],{},[148,28304,28305,28307],{},[169,28306,9058],{},": Converts raw document files into text.",[148,28309,28310,28313],{},[169,28311,28312],{},"Text Splitter",": Splits text into manageable chunks before embedding.",[148,28315,28316,28318],{},[169,28317,21833],{},": Uses the OpenAI embedding model to transform text chunks into numerical vectors.",[148,28320,28321,28324],{},[169,28322,28323],{},"Vector Index",": A brute-force KNN factory that indexes embeddings to allow fast semantic search.",[148,28326,28327,28329,28330,28332,28333,28335],{},[169,28328,10383],{},": Orchestrates the entire ingestion pipeline (reading, parsing, splitting, embedding, indexing).",[2949,28331],{},"\nWhenever new files appear in the ",[3061,28334,5185],{}," folder, DocumentStore automatically processes them and re-runs the embedding\u002Findexing pipeline in real time.",[148,28337,28338,28341],{},[169,28339,28340],{},"RAG App",": A simple RAG (Retrieval-Augmented Generation) solution is configured with a GPT-based LLM and a top-k search for retrieved chunks.",[3418,28343,28345],{"className":3420,"code":28344,"language":3422,"meta":23,"style":23},"import os\nimport getpass\nimport pathway as pw\n\nfrom pathway.stdlib.indexing import BruteForceKnnFactory\nfrom pathway.udfs import DiskCache\nfrom pathway.xpacks.llm import embedders, llms, parsers, splitters\nfrom pathway.xpacks.llm.document_store import DocumentStore\nfrom pathway.xpacks.llm.question_answering import BaseRAGQuestionAnswerer, RAGClient\nfrom pathway.xpacks.llm.servers import QASummaryRestServer\n\n\n# read the text files under the data folder, we can also read from Google Drive, Sharepoint, etc.\n# See connectors documentation: https:\u002F\u002Fpathway.com\u002Fdevelopers\u002Fuser-guide\u002Fconnect\u002Flive-data-framework-connectors to learn more\nfolder = pw.io.fs.read(\n    \".\u002Fdata\",\n    format=\"binary\",\n    with_metadata=True,\n    mode=\"streaming\"\n)\n\n# list of data sources to be indexed\nsources = [folder]\n\n# define the document processing steps\nparser = parsers.UnstructuredParser()\n\ntext_splitter = splitters.TokenCountSplitter(min_tokens=150, max_tokens=450)\n\nembedder = embedders.OpenAIEmbedder(cache_strategy=DiskCache())\n\nindex = BruteForceKnnFactory(embedder=embedder)\n\nllm = llms.OpenAIChat(model=\"gpt-4o\", cache_strategy=DiskCache())\n\ndocument_store = DocumentStore(\n    docs=sources, parser=parser, splitter=text_splitter, retriever_factory=index\n)\n\n# create the RAG app that will power the index, and serve the agent endpoint\nrag_app = BaseRAGQuestionAnswerer(\n    llm=llm,\n    indexer=document_store,\n    search_topk=8,  # number of retrieved chunks for RAG\n)\n",[3061,28346,28347,28353,28359,28369,28373,28392,28406,28436,28458,28484,28506,28510,28514,28518,28522,28544,28554,28568,28574,28586,28590,28594,28598,28610,28614,28618,28632,28636,28666,28670,28692,28696,28714,28718,28752,28756,28766,28798,28802,28806,28810,28820,28830,28840,28852],{"__ignoreMap":23},[1291,28348,28349,28351],{"class":3427,"line":3428},[1291,28350,3476],{"class":3475},[1291,28352,3486],{"class":3431},[1291,28354,28355,28357],{"class":3427,"line":24},[1291,28356,3476],{"class":3475},[1291,28358,5209],{"class":3431},[1291,28360,28361,28363,28365,28367],{"class":3427,"line":675},[1291,28362,3476],{"class":3475},[1291,28364,3533],{"class":3431},[1291,28366,3536],{"class":3475},[1291,28368,3539],{"class":3431},[1291,28370,28371],{"class":3427,"line":3542},[1291,28372,3526],{"emptyLinePlaceholder":35},[1291,28374,28375,28377,28379,28381,28383,28385,28387,28389],{"class":3427,"line":3547},[1291,28376,3550],{"class":3475},[1291,28378,3553],{"class":3431},[1291,28380,694],{"class":3435},[1291,28382,10510],{"class":3431},[1291,28384,694],{"class":3435},[1291,28386,10515],{"class":3431},[1291,28388,3476],{"class":3475},[1291,28390,28391],{"class":3431}," BruteForceKnnFactory\n",[1291,28393,28394,28396,28398,28400,28402,28404],{"class":3427,"line":3572},[1291,28395,3550],{"class":3475},[1291,28397,3553],{"class":3431},[1291,28399,694],{"class":3435},[1291,28401,3558],{"class":3431},[1291,28403,3476],{"class":3475},[1291,28405,10565],{"class":3431},[1291,28407,28408,28410,28412,28414,28416,28418,28420,28422,28424,28426,28428,28430,28432,28434],{"class":3427,"line":3614},[1291,28409,3550],{"class":3475},[1291,28411,3553],{"class":3431},[1291,28413,694],{"class":3435},[1291,28415,3581],{"class":3431},[1291,28417,694],{"class":3435},[1291,28419,3586],{"class":3431},[1291,28421,3476],{"class":3475},[1291,28423,3591],{"class":3431},[1291,28425,3566],{"class":3435},[1291,28427,3596],{"class":3431},[1291,28429,3566],{"class":3435},[1291,28431,3601],{"class":3431},[1291,28433,3566],{"class":3435},[1291,28435,3611],{"class":3431},[1291,28437,28438,28440,28442,28444,28446,28448,28450,28452,28454,28456],{"class":3427,"line":3640},[1291,28439,3550],{"class":3475},[1291,28441,3553],{"class":3431},[1291,28443,694],{"class":3435},[1291,28445,3581],{"class":3431},[1291,28447,694],{"class":3435},[1291,28449,3627],{"class":3431},[1291,28451,694],{"class":3435},[1291,28453,10614],{"class":3431},[1291,28455,3476],{"class":3475},[1291,28457,10619],{"class":3431},[1291,28459,28460,28462,28464,28466,28468,28470,28472,28474,28476,28478,28480,28482],{"class":3427,"line":3665},[1291,28461,3550],{"class":3475},[1291,28463,3553],{"class":3431},[1291,28465,694],{"class":3435},[1291,28467,3581],{"class":3431},[1291,28469,694],{"class":3435},[1291,28471,3627],{"class":3431},[1291,28473,694],{"class":3435},[1291,28475,3632],{"class":3431},[1291,28477,3476],{"class":3475},[1291,28479,4654],{"class":3431},[1291,28481,3566],{"class":3435},[1291,28483,4899],{"class":3431},[1291,28485,28486,28488,28490,28492,28494,28496,28498,28500,28502,28504],{"class":3427,"line":3670},[1291,28487,3550],{"class":3475},[1291,28489,3553],{"class":3431},[1291,28491,694],{"class":3435},[1291,28493,3581],{"class":3431},[1291,28495,694],{"class":3435},[1291,28497,3627],{"class":3431},[1291,28499,694],{"class":3435},[1291,28501,10664],{"class":3431},[1291,28503,3476],{"class":3475},[1291,28505,10669],{"class":3431},[1291,28507,28508],{"class":3427,"line":3677},[1291,28509,3526],{"emptyLinePlaceholder":35},[1291,28511,28512],{"class":3427,"line":3877},[1291,28513,3526],{"emptyLinePlaceholder":35},[1291,28515,28516],{"class":3427,"line":3916},[1291,28517,10682],{"class":3673},[1291,28519,28520],{"class":3427,"line":4519},[1291,28521,10687],{"class":3673},[1291,28523,28524,28526,28528,28530,28532,28534,28536,28538,28540,28542],{"class":3427,"line":6038},[1291,28525,4068],{"class":3431},[1291,28527,3738],{"class":3435},[1291,28529,4073],{"class":3431},[1291,28531,694],{"class":3435},[1291,28533,4078],{"class":3457},[1291,28535,694],{"class":3435},[1291,28537,4083],{"class":3457},[1291,28539,694],{"class":3435},[1291,28541,4088],{"class":3812},[1291,28543,3874],{"class":3435},[1291,28545,28546,28548,28550,28552],{"class":3427,"line":6043},[1291,28547,4382],{"class":3435},[1291,28549,3928],{"class":3439},[1291,28551,3691],{"class":3435},[1291,28553,4107],{"class":3435},[1291,28555,28556,28558,28560,28562,28564,28566],{"class":3427,"line":6066},[1291,28557,4112],{"class":3819},[1291,28559,3738],{"class":3435},[1291,28561,3691],{"class":3435},[1291,28563,4119],{"class":3439},[1291,28565,3691],{"class":3435},[1291,28567,4107],{"class":3435},[1291,28569,28570,28572],{"class":3427,"line":6078},[1291,28571,4128],{"class":3819},[1291,28573,4131],{"class":3435},[1291,28575,28576,28578,28580,28582,28584],{"class":3427,"line":6089},[1291,28577,5431],{"class":3819},[1291,28579,3738],{"class":3435},[1291,28581,3691],{"class":3435},[1291,28583,5438],{"class":3439},[1291,28585,3746],{"class":3435},[1291,28587,28588],{"class":3427,"line":6124},[1291,28589,3827],{"class":3435},[1291,28591,28592],{"class":3427,"line":6133},[1291,28593,3526],{"emptyLinePlaceholder":35},[1291,28595,28596],{"class":3427,"line":6141},[1291,28597,10752],{"class":3673},[1291,28599,28600,28602,28604,28606,28608],{"class":3427,"line":6151},[1291,28601,4140],{"class":3431},[1291,28603,3738],{"class":3435},[1291,28605,4145],{"class":3435},[1291,28607,4148],{"class":3431},[1291,28609,5267],{"class":3435},[1291,28611,28612],{"class":3427,"line":6923},[1291,28613,3526],{"emptyLinePlaceholder":35},[1291,28615,28616],{"class":3427,"line":6928},[1291,28617,10773],{"class":3673},[1291,28619,28620,28622,28624,28626,28628,28630],{"class":3427,"line":6934},[1291,28621,4522],{"class":3431},[1291,28623,3738],{"class":3435},[1291,28625,3601],{"class":3431},[1291,28627,694],{"class":3435},[1291,28629,9184],{"class":3812},[1291,28631,4871],{"class":3435},[1291,28633,28634],{"class":3427,"line":6940},[1291,28635,3526],{"emptyLinePlaceholder":35},[1291,28637,28638,28640,28642,28644,28646,28648,28650,28652,28654,28656,28658,28660,28662,28664],{"class":3427,"line":6952},[1291,28639,5370],{"class":3431},[1291,28641,3738],{"class":3435},[1291,28643,10800],{"class":3431},[1291,28645,694],{"class":3435},[1291,28647,10805],{"class":3812},[1291,28649,3816],{"class":3435},[1291,28651,10810],{"class":3819},[1291,28653,3738],{"class":3435},[1291,28655,6802],{"class":3451},[1291,28657,3566],{"class":3435},[1291,28659,10819],{"class":3819},[1291,28661,3738],{"class":3435},[1291,28663,10824],{"class":3451},[1291,28665,3827],{"class":3435},[1291,28667,28668],{"class":3427,"line":6984},[1291,28669,3526],{"emptyLinePlaceholder":35},[1291,28671,28672,28674,28676,28678,28680,28682,28684,28686,28688,28690],{"class":3427,"line":7996},[1291,28673,4292],{"class":3431},[1291,28675,3738],{"class":3435},[1291,28677,3591],{"class":3431},[1291,28679,694],{"class":3435},[1291,28681,10843],{"class":3812},[1291,28683,3816],{"class":3435},[1291,28685,19820],{"class":3819},[1291,28687,3738],{"class":3435},[1291,28689,10855],{"class":3812},[1291,28691,6237],{"class":3435},[1291,28693,28694],{"class":3427,"line":8007},[1291,28695,3526],{"emptyLinePlaceholder":35},[1291,28697,28698,28700,28702,28704,28706,28708,28710,28712],{"class":3427,"line":8018},[1291,28699,10889],{"class":3431},[1291,28701,3738],{"class":3435},[1291,28703,10520],{"class":3812},[1291,28705,3816],{"class":3435},[1291,28707,4597],{"class":3819},[1291,28709,3738],{"class":3435},[1291,28711,4597],{"class":3812},[1291,28713,3827],{"class":3435},[1291,28715,28716],{"class":3427,"line":8029},[1291,28717,3526],{"emptyLinePlaceholder":35},[1291,28719,28720,28722,28724,28726,28728,28730,28732,28734,28736,28738,28740,28742,28744,28746,28748,28750],{"class":3427,"line":8040},[1291,28721,3586],{"class":3431},[1291,28723,3738],{"class":3435},[1291,28725,3596],{"class":3431},[1291,28727,694],{"class":3435},[1291,28729,10920],{"class":3812},[1291,28731,3816],{"class":3435},[1291,28733,10049],{"class":3819},[1291,28735,3738],{"class":3435},[1291,28737,3691],{"class":3435},[1291,28739,7849],{"class":3439},[1291,28741,3691],{"class":3435},[1291,28743,3566],{"class":3435},[1291,28745,10937],{"class":3819},[1291,28747,3738],{"class":3435},[1291,28749,10855],{"class":3812},[1291,28751,6237],{"class":3435},[1291,28753,28754],{"class":3427,"line":8051},[1291,28755,3526],{"emptyLinePlaceholder":35},[1291,28757,28758,28760,28762,28764],{"class":3427,"line":8057},[1291,28759,10614],{"class":3431},[1291,28761,3738],{"class":3435},[1291,28763,10956],{"class":3812},[1291,28765,3874],{"class":3435},[1291,28767,28768,28770,28772,28774,28776,28778,28780,28782,28784,28786,28788,28790,28792,28794,28796],{"class":3427,"line":8068},[1291,28769,10164],{"class":3819},[1291,28771,3738],{"class":3435},[1291,28773,4585],{"class":3812},[1291,28775,3566],{"class":3435},[1291,28777,9401],{"class":3819},[1291,28779,3738],{"class":3435},[1291,28781,4621],{"class":3812},[1291,28783,3566],{"class":3435},[1291,28785,5566],{"class":3819},[1291,28787,3738],{"class":3435},[1291,28789,10983],{"class":3812},[1291,28791,3566],{"class":3435},[1291,28793,10988],{"class":3819},[1291,28795,3738],{"class":3435},[1291,28797,10993],{"class":3812},[1291,28799,28800],{"class":3427,"line":8079},[1291,28801,3827],{"class":3435},[1291,28803,28804],{"class":3427,"line":8090},[1291,28805,3526],{"emptyLinePlaceholder":35},[1291,28807,28808],{"class":3427,"line":8101},[1291,28809,11072],{"class":3673},[1291,28811,28812,28814,28816,28818],{"class":3427,"line":8112},[1291,28813,11077],{"class":3431},[1291,28815,3738],{"class":3435},[1291,28817,4654],{"class":3812},[1291,28819,3874],{"class":3435},[1291,28821,28822,28824,28826,28828],{"class":3427,"line":8117},[1291,28823,10105],{"class":3819},[1291,28825,3738],{"class":3435},[1291,28827,3627],{"class":3812},[1291,28829,4107],{"class":3435},[1291,28831,28832,28834,28836,28838],{"class":3427,"line":8128},[1291,28833,11098],{"class":3819},[1291,28835,3738],{"class":3435},[1291,28837,11103],{"class":3812},[1291,28839,4107],{"class":3435},[1291,28841,28842,28844,28846,28848,28850],{"class":3427,"line":8139},[1291,28843,11121],{"class":3819},[1291,28845,3738],{"class":3435},[1291,28847,11126],{"class":3451},[1291,28849,3566],{"class":3435},[1291,28851,11131],{"class":3673},[1291,28853,28854],{"class":3427,"line":8150},[1291,28855,3827],{"class":3435},[3189,28857,28859],{"id":28858},"starting-the-pathway-live-data-framework-rag-server","Starting the Pathway Live Data Framework RAG Server",[665,28861,28862,28868],{},[148,28863,28864,28867],{},[169,28865,28866],{},"RAG Server:"," We instantiate a REST server (QASummaryRestServer) that exposes endpoints for question answering.",[148,28869,28870,28873],{},[169,28871,28872],{},"Parallel Process:"," We run the server in a separate process, making it easy to handle incoming queries asynchronously.",[3418,28875,28877],{"className":3420,"code":28876,"language":3422,"meta":23,"style":23},"import multiprocessing\n\n# host and port of the RAG app\npathway_host: str = \"0.0.0.0\"\npathway_port: int = 8000\n\nserver = QASummaryRestServer(pathway_host, pathway_port, rag_app)\n\nserver_process = multiprocessing.Process(target=server.run, kwargs=dict(threaded=False))\n\nserver_process.start()\n",[3061,28878,28879,28885,28889,28893,28909,28921,28925,28947,28951,28989,28993],{"__ignoreMap":23},[1291,28880,28881,28883],{"class":3427,"line":3428},[1291,28882,3476],{"class":3475},[1291,28884,11151],{"class":3431},[1291,28886,28887],{"class":3427,"line":24},[1291,28888,3526],{"emptyLinePlaceholder":35},[1291,28890,28891],{"class":3427,"line":675},[1291,28892,11164],{"class":3673},[1291,28894,28895,28897,28899,28901,28903,28905,28907],{"class":3427,"line":3542},[1291,28896,11169],{"class":3431},[1291,28898,4390],{"class":3435},[1291,28900,9387],{"class":6356},[1291,28902,3702],{"class":3435},[1291,28904,3705],{"class":3435},[1291,28906,4738],{"class":3439},[1291,28908,3746],{"class":3435},[1291,28910,28911,28913,28915,28917,28919],{"class":3427,"line":3547},[1291,28912,11186],{"class":3431},[1291,28914,4390],{"class":3435},[1291,28916,11191],{"class":6356},[1291,28918,3702],{"class":3435},[1291,28920,4750],{"class":3451},[1291,28922,28923],{"class":3427,"line":3572},[1291,28924,3526],{"emptyLinePlaceholder":35},[1291,28926,28927,28929,28931,28933,28935,28937,28939,28941,28943,28945],{"class":3427,"line":3614},[1291,28928,5536],{"class":3431},[1291,28930,3738],{"class":3435},[1291,28932,11226],{"class":3812},[1291,28934,3816],{"class":3435},[1291,28936,11169],{"class":3812},[1291,28938,3566],{"class":3435},[1291,28940,11235],{"class":3812},[1291,28942,3566],{"class":3435},[1291,28944,11240],{"class":3812},[1291,28946,3827],{"class":3435},[1291,28948,28949],{"class":3427,"line":3640},[1291,28950,3526],{"emptyLinePlaceholder":35},[1291,28952,28953,28955,28957,28959,28961,28963,28965,28967,28969,28971,28973,28975,28977,28979,28981,28983,28985,28987],{"class":3427,"line":3665},[1291,28954,11251],{"class":3431},[1291,28956,3738],{"class":3435},[1291,28958,11256],{"class":3431},[1291,28960,694],{"class":3435},[1291,28962,11261],{"class":3812},[1291,28964,3816],{"class":3435},[1291,28966,4813],{"class":3819},[1291,28968,3738],{"class":3435},[1291,28970,5580],{"class":3812},[1291,28972,694],{"class":3435},[1291,28974,11274],{"class":3457},[1291,28976,3566],{"class":3435},[1291,28978,11279],{"class":3819},[1291,28980,3738],{"class":3435},[1291,28982,11284],{"class":6356},[1291,28984,3816],{"class":3435},[1291,28986,11289],{"class":3819},[1291,28988,11292],{"class":3435},[1291,28990,28991],{"class":3427,"line":3670},[1291,28992,3526],{"emptyLinePlaceholder":35},[1291,28994,28995,28997,28999,29001],{"class":3427,"line":3677},[1291,28996,11305],{"class":3431},[1291,28998,694],{"class":3435},[1291,29000,4868],{"class":3812},[1291,29002,4871],{"class":3435},[3189,29004,29006],{"id":29005},"querying-the-existing-document","Querying the Existing Document",[665,29008,29009,29019,29025],{},[148,29010,29011,29014,29015,29018],{},[169,29012,29013],{},"Client Setup",": We create a ",[3061,29016,29017],{},"RAGClient"," to interface with our Pathway Live Data Framework RAG server.",[148,29020,29021,29024],{},[169,29022,29023],{},"Listing Documents",": We see which files are currently in the document store.",[148,29026,29027,29030],{},[169,29028,29029],{},"Question-Answering",": We then query the content of the newly ingested PDF. Pathway automatically used the parsed, split, and embedded chunks from that PDF to generate an answer.",[3418,29032,29034],{"className":3420,"code":29033,"language":3422,"meta":23,"style":23},"from pathway.xpacks.llm.question_answering import RAGClient\n\npathway_client = RAGClient(pathway_host, pathway_port)\npathway_client.pw_list_documents()\n",[3061,29035,29036,29058,29062,29080],{"__ignoreMap":23},[1291,29037,29038,29040,29042,29044,29046,29048,29050,29052,29054,29056],{"class":3427,"line":3428},[1291,29039,3550],{"class":3475},[1291,29041,3553],{"class":3431},[1291,29043,694],{"class":3435},[1291,29045,3581],{"class":3431},[1291,29047,694],{"class":3435},[1291,29049,3627],{"class":3431},[1291,29051,694],{"class":3435},[1291,29053,3632],{"class":3431},[1291,29055,3476],{"class":3475},[1291,29057,4899],{"class":3431},[1291,29059,29060],{"class":3427,"line":24},[1291,29061,3526],{"emptyLinePlaceholder":35},[1291,29063,29064,29066,29068,29070,29072,29074,29076,29078],{"class":3427,"line":675},[1291,29065,11353],{"class":3431},[1291,29067,3738],{"class":3435},[1291,29069,4918],{"class":3812},[1291,29071,3816],{"class":3435},[1291,29073,11169],{"class":3812},[1291,29075,3566],{"class":3435},[1291,29077,11235],{"class":3812},[1291,29079,3827],{"class":3435},[1291,29081,29082,29084,29086,29089],{"class":3427,"line":3542},[1291,29083,11372],{"class":3431},[1291,29085,694],{"class":3435},[1291,29087,29088],{"class":3812},"pw_list_documents",[1291,29090,4871],{"class":3435},[3418,29092,29094],{"className":3420,"code":29093,"language":3422,"meta":23,"style":23},"pathway_client.pw_ai_answer(\"What are the terms and conditions of the contract?\")\n",[3061,29095,29096],{"__ignoreMap":23},[1291,29097,29098,29100,29102,29105,29107,29109,29112,29114],{"class":3427,"line":3428},[1291,29099,11372],{"class":3431},[1291,29101,694],{"class":3435},[1291,29103,29104],{"class":3812},"pw_ai_answer",[1291,29106,3816],{"class":3435},[1291,29108,3691],{"class":3435},[1291,29110,29111],{"class":3439},"What are the terms and conditions of the contract?",[1291,29113,3691],{"class":3435},[1291,29115,3827],{"class":3435},[3189,29117,29119],{"id":29118},"ingesting-a-second-new-file","Ingesting a Second (New) File",[665,29121,29122,29128,29144,29149],{},[148,29123,29124,29127],{},[169,29125,29126],{},"Adding a New Document",": We add a second PDF to the same folder watched by Pathway.",[148,29129,29130,29133,29134,29136,29137],{},[169,29131,29132],{},"Automatic Re-indexing",": As soon as the file appears in ",[3061,29135,4102],{},", Pathway automatically triggers the pipeline:",[145,29138,29139],{},[148,29140,29141,694],{},[169,29142,29143],{},"Parsing → Splitting → Embedding → Indexing",[148,29145,29146,29148],{},[169,29147,29023],{},": You will see that both the original PDF and the newly added PDF show up.",[148,29150,29151,29153],{},[169,29152,29029],{},": We can now query specifically about the second file. Pathway fetches the relevant text chunks from the newly embedded content to generate an answer, all without needing to manually re-run the code.",[3418,29155,29157],{"className":6347,"code":29156,"language":6349,"meta":23,"style":23},"!wget -q -P .\u002Fdata\u002F https:\u002F\u002Fgithub.com\u002Fpathwaycom\u002Fllm-app\u002Fraw\u002Fmain\u002Ftemplates\u002Fmultimodal_rag\u002Fdata\u002F20230203_alphabet_10K.pdf\n",[3061,29158,29159],{"__ignoreMap":23},[1291,29160,29161,29163,29165,29167,29169,29171],{"class":3427,"line":3428},[1291,29162,9175],{"class":3435},[1291,29164,7203],{"class":6356},[1291,29166,28257],{"class":3439},[1291,29168,28260],{"class":3439},[1291,29170,28263],{"class":3439},[1291,29172,29173],{"class":3439}," https:\u002F\u002Fgithub.com\u002Fpathwaycom\u002Fllm-app\u002Fraw\u002Fmain\u002Ftemplates\u002Fmultimodal_rag\u002Fdata\u002F20230203_alphabet_10K.pdf\n",[3418,29175,29177],{"className":3420,"code":29176,"language":3422,"meta":23,"style":23},"pathway_client.pw_list_documents()\n",[3061,29178,29179],{"__ignoreMap":23},[1291,29180,29181,29183,29185,29187],{"class":3427,"line":3428},[1291,29182,11372],{"class":3431},[1291,29184,694],{"class":3435},[1291,29186,29088],{"class":3812},[1291,29188,4871],{"class":3435},[3418,29190,29192],{"className":3420,"code":29191,"language":3422,"meta":23,"style":23},"pathway_client.pw_ai_answer(\"What are the table of contents in 20230203_alphabet_10K.pdf?\")\n",[3061,29193,29194],{"__ignoreMap":23},[1291,29195,29196,29198,29200,29202,29204,29206,29209,29211],{"class":3427,"line":3428},[1291,29197,11372],{"class":3431},[1291,29199,694],{"class":3435},[1291,29201,29104],{"class":3812},[1291,29203,3816],{"class":3435},[1291,29205,3691],{"class":3435},[1291,29207,29208],{"class":3439},"What are the table of contents in 20230203_alphabet_10K.pdf?",[1291,29210,3691],{"class":3435},[1291,29212,3827],{"class":3435},[3189,29214,29216],{"id":29215},"scalability-and-advantages","Scalability and Advantages",[3206,29218,29220],{"id":29219},"real-time-ingestion-with-pathway-live-data-framework","Real-Time Ingestion with Pathway Live Data Framework",[145,29222,29223,29229],{},[148,29224,29225,29228],{},[169,29226,29227],{},"Automatic Updates",": Whenever a document is modified or a new file is added, the framework automatically reprocesses and re-embeds that data.",[148,29230,29231,29234],{},[169,29232,29233],{},"Consistency",": Your index always remains up to date without additional manual steps.",[3206,29236,29238],{"id":29237},"key-benefits","Key Benefits",[665,29240,29241,29247,29253,29259],{},[148,29242,29243,29246],{},[169,29244,29245],{},"Seamless Data Integration",": Connect to diverse data sources—file systems, Kafka, APIs, SharePoint, S3, PostgreSQL, Google Drive, and more.",[148,29248,29249,29252],{},[169,29250,29251],{},"Real-Time Indexing",": New files, content updates, and deletions are continuously processed.",[148,29254,29255,29258],{},[169,29256,29257],{},"Advanced Search",": Use semantic vectors, hybrid queries, and full-text search in-memory for fast retrieval.",[148,29260,29261,29264],{},[169,29262,29263],{},"No Extra Infrastructure",": Deploy Pathway with minimal overhead—no separate streaming or indexing clusters needed.",[140,29266,5008],{"id":5007},[73,29268,29269,29270],{},"Text embeddings are a way to represent words or textual documents as large dimensional mathematical vectors. They transform words into numerical vectors which capture their meaning based on their ",[169,29271,29272],{},"context.",[73,29274,29275],{},"A walk through the blog is as follows:",[145,29277,29278,29283,29288,29293,29299,29304],{},[148,29279,29280,29282],{},[15804,29281,23958],{},": vectorize words based on their count in the document or sample",[148,29284,29285,29287],{},[15804,29286,25109],{},": measures the frequency of a word occurring in the document while the inverse document frequency (IDF) measures the rarity of the words in the corpus",[148,29289,29290,29292],{},[15804,29291,23970],{},": learn word associations and place words with similar meanings close to each other in the vector space",[148,29294,29295,29298],{},[15804,29296,29297],{},"Continuous Bag Of Words (CBOW)",": picks a target word, and its surrounding words are the context",[148,29300,29301,29303],{},[15804,29302,26890],{},": target word is known and the model tries to guess the context using it",[148,29305,29306,29308],{},[15804,29307,23976],{},": constructs a co-occurrence matrix which is then used to capture the semantic relations between the words",[73,29310,29311],{},"Here's a quick comparison between the models based on their strengths, weaknesses and key use cases",[24540,29313,29315],{"className":29314},[24543,24544],[16104,29316,29317,29333],{},[16107,29318,29319],{},[16110,29320,29321,29324,29327,29330],{},[16113,29322,29323],{},"Model",[16113,29325,29326],{},"Strengths",[16113,29328,29329],{},"Weaknesses",[16113,29331,29332],{},"Key Use Cases ",[16162,29334,29335,29350,29365,29380],{},[16110,29336,29337,29341,29344,29347],{},[16167,29338,29339],{},[169,29340,23958],{},[16167,29342,29343],{},"- Implementation is simple and very effective with small datasets",[16167,29345,29346],{},"- Word order and context is ignored.- High-dimensional and sparse vectors if the vocabulary is large.",[16167,29348,29349],{},"- Text classification tasks with small datasets.- Basic sentiment analysis.",[16110,29351,29352,29356,29359,29362],{},[16167,29353,29354],{},[169,29355,23964],{},[16167,29357,29358],{},"- Weighs words by importance (frequency) within the document.- Reduces the impact of common and less informative words.",[16167,29360,29361],{},"- Ignores word order and semantic meaning.- High-dimensional vectors for large vocabularies.",[16167,29363,29364],{},"- Document retrieval and ranking.- Keyword extraction can be done.",[16110,29366,29367,29371,29374,29377],{},[16167,29368,29369],{},[169,29370,23970],{},[16167,29372,29373],{},"- Captures semantic meaning and relationships between words.- Dense, non-sparse vectors.- Effective for similarity and analogy tasks.",[16167,29375,29376],{},"- Training can be computationally heavy.- Requires large datasets for better quality.",[16167,29378,29379],{},"- Semantic search.- Better Sentiment analysis.- Suggesting similar words.",[16110,29381,29382,29386,29389,29392],{},[16167,29383,29384],{},[169,29385,23976],{},[16167,29387,29388],{},"- Captures both global and local context effectively.- Dense, low-dimensional vectors.- Good for analogy and similarity tasks.",[16167,29390,29391],{},"- Computationally intensive to train.- Requires substantial preprocessing and large corpora for effectiveness.",[16167,29393,29394],{},"- Document classification.- Named entity recognition.- Similarity and analogy tasks.",[73,29396,29397],{},"Traditional techniques for representing words, such as Bag of Words, fail to capture the precise meaning of words and their relationships with other words. Word Embeddings overcome this issue as they can capture the semantic relationships between words and group them. For example, a search like 'running shoes' will also surface results for 'athletic footwear'- ensuring better recognition of the meaning.\nIn this blog, you saw how the text embeddings capture the analogies using simple examples like Countries and Capitals, Verb Tenses, and Gender Analogy. Now you know better how Text Embeddings work in hidden, plain sight!",[3189,29399,29401],{"id":29400},"citations","Citations",[145,29403,29404,29411,29418,29425],{},[148,29405,29406],{},[77,29407,29410],{"href":29408,"rel":29409},"https:\u002F\u002Fwww.ibm.com\u002Ftopics\u002Fembedding",[81],"IBM's blog on embeddings",[148,29412,29413],{},[77,29414,29417],{"href":29415,"rel":29416},"https:\u002F\u002Fwww.turing.com\u002Fkb\u002Fguide-on-word-embeddings-in-nlp",[81],"Turing's blog on word embeddings",[148,29419,29420],{},[77,29421,29424],{"href":29422,"rel":29423},"https:\u002F\u002Fnlp.stanford.edu\u002Fprojects\u002Fglove\u002F",[81],"Stanford University's Global Vector Representations",[148,29426,29427],{},[77,29428,29431],{"href":29429,"rel":29430},"https:\u002F\u002Fgithub.com\u002Fpathwaycom\u002Fpathway",[81],"Pathway's Repositories",[3189,29433,29435],{"id":29434},"editors","Editors",[145,29437,29438,29444],{},[148,29439,29440,29441],{},"Saksham Goel ",[15804,29442,29443],{},"- DevRel Engineer at Pathway",[148,29445,29446,29447],{},"Shlok Srivastava ",[15804,29448,29449],{},"- Lead Engineer at Pine Labs",[22360,29451],{},[5019,29453,29454],{},"html pre.shiki code .s6cf3, html code.shiki .s6cf3{--shiki-default:#89DDFF;--shiki-default-font-style:italic}html pre.shiki code .s0W1g, html code.shiki .s0W1g{--shiki-default:#BABED8}html pre.shiki code .sAklC, html code.shiki .sAklC{--shiki-default:#89DDFF}html pre.shiki code .sfyAc, html code.shiki .sfyAc{--shiki-default:#C3E88D}html pre.shiki code .saEQR, html code.shiki .saEQR{--shiki-default:#676E95;--shiki-default-font-style:italic}html pre.shiki code .sdLwU, html code.shiki .sdLwU{--shiki-default:#82AAFF}html pre.shiki code .s5Dmg, html code.shiki .s5Dmg{--shiki-default:#FFCB6B}html pre.shiki code .s7ZW3, html code.shiki .s7ZW3{--shiki-default:#BABED8;--shiki-default-font-style:italic}html pre.shiki code .sx098, html code.shiki .sx098{--shiki-default:#F78C6C}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html pre.shiki code .s-wAU, html code.shiki .s-wAU{--shiki-default:#F07178}html pre.shiki code .sJ14y, html code.shiki .sJ14y{--shiki-default:#C792EA}",{"title":23,"searchDepth":24,"depth":24,"links":29456},[29457,29458,29459,29460,29461,29462,29463,29464,29467,29468,29471,29472,29476,29477,29486],{"id":23901,"depth":24,"text":23902},{"id":23933,"depth":24,"text":23934},{"id":23947,"depth":24,"text":23948},{"id":23983,"depth":24,"text":23984},{"id":25108,"depth":24,"text":25109},{"id":26862,"depth":24,"text":26863},{"id":26879,"depth":24,"text":23970},{"id":26896,"depth":24,"text":26897,"children":29465},[29466],{"id":26956,"depth":675,"text":26957},{"id":27275,"depth":24,"text":27276},{"id":27292,"depth":24,"text":27293,"children":29469},[29470],{"id":27385,"depth":675,"text":27386},{"id":27773,"depth":24,"text":27774},{"id":27780,"depth":24,"text":27781,"children":29473},[29474,29475],{"id":27784,"depth":675,"text":27785},{"id":27798,"depth":675,"text":27799},{"id":28049,"depth":24,"text":28050},{"id":28075,"depth":24,"text":28076,"children":29478},[29479,29480,29481,29482,29483,29484,29485],{"id":28093,"depth":675,"text":28094},{"id":28242,"depth":675,"text":28243},{"id":28269,"depth":675,"text":28270},{"id":28858,"depth":675,"text":28859},{"id":29005,"depth":675,"text":29006},{"id":29118,"depth":675,"text":29119},{"id":29215,"depth":675,"text":29216},{"id":5007,"depth":24,"text":5008,"children":29487},[29488,29489],{"id":29400,"depth":675,"text":29401},{"id":29434,"depth":675,"text":29435},"The world is filled with fascinating technology. It can feel overwhelming to see such extravagant machines and systems at work, yet it is easy to overlook the intricate engineering powering our most routine tasks. Consider, for example, the smartphone you rely on every day. We often use it mindlessly—scrolling through social media, checking emails, or chatting with friends—without appreciating the sophisticated processes working behind the scenes",{"layout":90,"date":23028,"thumbnail":29492,"tags":29494,"coauthors":29495},{"src":29493,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fpathway-text-embeddings-th.png",[17783],[29496],{"name":29497,"description":23891,"img":29498,"provider":11,"linkedin":29499},"Yashasvee Taiwade","\u002Fassets\u002Fblog\u002Favatars\u002Fyashasvee-avatar.png","https:\u002F\u002Fwww.linkedin.com\u002Fin\u002Fyashasvee-taiwade-84826a337\u002F","\u002Fframework\u002Fblog\u002Fhow-text-embeddings-help-suggest-similar-words",{"title":23888,"description":29490},{"loc":29500},"framework\u002Fblog\u002F881.how-text-embeddings-help-suggest-similar-words","5zafS9pud9F047TzxqNU2xX34z01x9WG1z76qFOqmkM",{"id":29506,"title":29507,"author":29508,"body":29509,"description":23,"extension":27,"meta":29561,"navigation":35,"path":29565,"seo":29566,"sitemap":29567,"stem":29568,"__hash__":29569},"content\u002Fframework\u002Fblog\u002F882.pathway-predictions-2025.md","2025 PATHWAY CEO PREDICTIONS",{"id":312,"url":313,"name":314,"description":315,"img":316,"provider":11,"linkedin":317},{"type":13,"value":29510,"toc":29552},[29511,29514,29518,29522,29525,29528,29532,29535,29539,29543,29546,29549],[68,29512,29507],{"id":29513},"_2025-pathway-ceo-predictions",[140,29515,29517],{"id":29516},"ai-predictions-for-2025","AI Predictions for 2025",[3189,29519,29521],{"id":29520},"enterprises-need-to-nail-data-management-in-complex-environments-to-move-from-pilot-to-production","Enterprises need to nail data management in complex environments to move from pilot to production",[73,29523,29524],{},"2025 will be the year that enterprise artificial intelligence (AI) moves from proof of concept (POC) to production, but it is becoming increasingly clear that the road to perfect solutions is going to be longer than anticipated. Last year, businesses allocated budgets and resources for large language model (LLM) experimentation, but next year this will become more closely scrutinized and those responsible for deployment will be under more pressure to prove a return on investment. Yet, a challenge presents itself in that limitations in the model aren’t always clear at the prototype stage.",[73,29526,29527],{},"According to Gartner, the most pressing of these issues is data management, along with privacy and data updates. Organizations must feed their enterprise data into LLMs to be truly beneficial for business. The constantly and rapidly changing environment in which organizations operate presents a data management challenge. Models need to have the ability to contextualize, index and retrieve relevant complex data at operational speed. Because of this, we will see LiveAI™ become more prominent as organizations look at better approaches to take LLMs into production. This will not only overcome the common limitations around data management, but will allow businesses to deal with the freshness of both structured and unstructured data in a smarter way.",[3189,29529,29531],{"id":29530},"o1-will-disrupt-the-market-but-full-realization-might-not-come-until-2026","o1 will disrupt the market, but full realization might not come until 2026",[73,29533,29534],{},"The rise of OpenAI’s o1, with its enhanced reasoning, mathematics and fact-checking capabilities, is going to disrupt the world of AI. It will fulfill the demand for a model with the capacity to think deeply and solve advanced problems. This will open new areas of applications and disrupt the AI market. I predict that the full extent of the shake-up o1 is going to create won’t be released next year, although there will be an exciting race as other players in the space compete to keep up with the technology. That being said, o1, with its slow outputs and cost barriers, isn’t the end game for enhanced reasoning for LLMs. Many organizations are still looking at how they can close the gap in their own data while maintaining data management and privacy standards. Overcoming this is something I anticipate we’ll see more research into next year, but again, we might see the bigger outcomes coming in 2026.",[140,29536,29538],{"id":29537},"_2025-predictions-for-start-up-founders","2025 Predictions for Start-up Founders",[3189,29540,29542],{"id":29541},"ai-founders-need-to-outsmart-the-market-to-remain-defensible-to-investors","AI founders need to outsmart the market to remain defensible to investors",[73,29544,29545],{},"The AI market is changing quickly, which presents both a challenge and an opportunity for founders. Those who can predict where AI is going can catch early opportunities and outsmart the market. The rise of o1 from OpenAI, for example, is going to catalyze one of the biggest shifts in AI since its inception. The deeper reasoning capacity of the model is exciting, especially as competitors don’t have anything that compares at present. It will be interesting to see how the startup scene grasps this opportunity.",[73,29547,29548],{},"Concurrently, we will see companies disappear. Investors have made it clear that they are not looking for more of the same and need to see proposals which are strongly differentiated and defensible.",[73,29550,29551],{},"2025 is poised to be a pivotal year for enterprise AI, marking a shift from experimental phases to real-world applications. This transition hinges on effectively addressing data management challenges in complex environments. The ability to seamlessly integrate and manage data will be crucial for enterprises to harness the true potential of AI. OpenAI's o1 is expected to disrupt the AI landscape, offering advanced reasoning capabilities that surpass existing models. While the full impact of o1 might not be fully realized until 2026, its emergence will likely spark a competitive race among other players in the AI space. Notably, start-up founders will need to demonstrate innovation and defensibility to attract investors in a rapidly evolving market. Identifying and capitalizing on emerging trends, such as those spurred by o1, will be key to success.",{"title":23,"searchDepth":24,"depth":24,"links":29553},[29554,29558],{"id":29516,"depth":24,"text":29517,"children":29555},[29556,29557],{"id":29520,"depth":675,"text":29521},{"id":29530,"depth":675,"text":29531},{"id":29537,"depth":24,"text":29538,"children":29559},[29560],{"id":29541,"depth":675,"text":29542},{"layout":90,"date":367,"thumbnail":29562,"tags":29564,"hidden":35},{"src":29563,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fpathway-2025-predictions-th.png",[90],"\u002Fframework\u002Fblog\u002Fpathway-predictions-2025",{"title":29507,"description":23},{"loc":29565},"framework\u002Fblog\u002F882.pathway-predictions-2025","mGO8mOElUoBKEbYBcynptaOlpWnUXNLk3jOHiSpjkJs",{"id":29571,"title":29572,"author":29573,"body":29575,"description":35076,"extension":27,"meta":35077,"navigation":35,"path":35084,"seo":35085,"sitemap":35086,"stem":35087,"__hash__":35088},"content\u002Fframework\u002Fblog\u002F883.build-real-time-systems-nats-pathway-alternative-kafka-flink.md","Scalable Alternative to Apache Kafka and Flink for Advanced Streaming: Build Real-Time Systems with NATS and Pathway",{"name":22721,"description":22722,"img":29574,"linkedin":22724},"\u002Fassets\u002Fcontent\u002Fblog\u002Favatars\u002Fshlok-srivastava-av.jpg",{"type":13,"value":29576,"toc":35033},[29577,29581,29584,29587,29608,29612,29615,29619,29634,29638,29654,29658,29665,29740,29744,29748,29751,29770,29776,29783,29787,29798,29812,29816,29819,29826,29837,29856,29863,29866,29882,29889,29900,29933,29940,29957,30011,30018,30036,30043,30050,30070,30077,30085,30245,30249,30252,30255,30261,30286,30293,30313,30319,30347,30354,30420,30431,30438,30492,30501,30508,30541,30544,30551,30693,30699,30705,30729,30736,31109,31113,31122,31125,31130,31148,31153,31165,31168,31216,31222,31226,31230,31236,31249,31253,31256,31260,31263,31305,31309,31334,31341,31345,31348,31355,31370,31377,31453,31460,31467,31510,31513,31520,31612,31618,31625,31640,31643,31647,31872,31878,31882,31887,31899,31904,31921,31926,31936,31941,31985,31989,31993,31996,31999,32002,32006,32009,32014,32017,32021,32025,32035,32039,32042,32055,32059,32068,32072,32078,32082,32085,32093,32096,32100,32103,32106,32113,32155,32162,32178,32184,32212,32219,32268,32275,32286,32293,32519,32530,32575,32578,32585,32606,32612,32632,32636,33050,33054,33062,33065,33076,33079,33082,33088,33102,33109,33193,33196,33204,33287,33294,33301,33457,33464,33479,33597,33604,33618,33672,33679,33682,33756,33762,33776,33780,34457,34461,34467,34473,34477,34748,34752,34755,34758,34766,34778,34790,34800,34807,34817,34820,34897,34899,34902,34905,34909,34940,34944,34964,34967,34971,35030],[68,29578,29580],{"id":29579},"build-real-time-systems-with-nats-and-pathway-scalable-alternative-to-apache-kafka-and-flink-for-advanced-streaming","Build Real-Time Systems with NATS and Pathway: Scalable Alternative to Apache Kafka and Flink for Advanced Streaming",[73,29582,29583],{},"Real-time data processing is crucial for businesses to make swift, informed decisions. Whether it's monitoring IoT devices, analyzing financial transactions, or providing instant user feedback, real-time systems form the backbone of modern applications.",[73,29585,29586],{},"In this blog post, you’ll learn how to build robust real-time systems without using Kafka or Flink.",[73,29588,29589,29590,29593,29594,29596,29597,3126,29599,29601,29602,3126,29605,694],{},"You’ll be using ",[15804,29591,29592],{},"NATS",", a high-performance messaging system, and ",[15804,29595,1279],{},", a powerful batch and stream processing framework. Building such systems can be complex, but powerful tools like ",[15804,29598,29592],{},[15804,29600,1279],{}," are making it easier than ever. This blog introduces these technologies which are well-adopted alternatives to Apache Kafka and Apache Flink respectively. This blog also provides practical code examples, and walks through a use case of ",[169,29603,29604],{},"Real Time Fleet Monitoring",[169,29606,29607],{},"Predictive Maintenance",[140,29609,29611],{"id":29610},"why-consider-alternatives-to-apache-kafka-and-a-flink-alternative","Why Consider Alternatives to Apache Kafka and a Flink Alternative?",[73,29613,29614],{},"As your distributed systems grow, you may find yourself looking for alternatives of Apache Kafka or alternatives of Flink. These platforms, while powerful, introduce unnecessary complexity, inconsistent performance, and steep costs—up to $20,000 per month for minimal Kafka deployments, plus thousands more for managed services like Confluent Cloud. In contrast, a streaming pipeline using NATS and Pathway can address several of these issues.",[3189,29616,29618],{"id":29617},"nats-a-simpler-more-efficient-alternative-to-apache-kafka","NATS: A Simpler, More Efficient Alternative to Apache Kafka",[145,29620,29621,29628,29631],{},[148,29622,29623,29627],{},[77,29624,29592],{"href":29625,"rel":29626},"https:\u002F\u002Fnats.io\u002F",[81]," is an open-source, lightweight messaging system designed for cloud-native applications, IoT messaging, and microservices architectures.",[148,29629,29630],{},"It provides a simple yet powerful publish\u002Fsubscribe model for asynchronous communication between distributed systems.",[148,29632,29633],{},"As an alternative to Apache Kafka, NATS delivers lower overhead and simpler operations while maintaining high throughput and resilience.",[3189,29635,29637],{"id":29636},"pathway-live-data-framework-the-leading-flink-alternative-for-advanced-real-time-analytics","Pathway Live Data Framework: the Leading Flink Alternative for Advanced Real-Time Analytics",[145,29639,29640,29645,29648,29651],{},[148,29641,29642,29644],{},[77,29643,1279],{"href":711}," is an advanced stream-processing framework tailored for real-time data analytics.",[148,29646,29647],{},"It simplifies building data pipelines for ingesting, processing, and analyzing data streams, allowing developers to focus on business logic rather than infrastructure.",[148,29649,29650],{},"For teams seeking a Flink alternative, Pathway Live Data Framework is not only easier to learn and use, but also supports real-time machine learning, dynamic graph algorithms, and advanced data transformations—features that make it a more powerful option than traditional Flink implementations.",[148,29652,29653],{},"If you’re evaluating alternatives to Apache Kafka for high-performance messaging, or seeking a Flink alternative to handle advanced streaming analytics, this guide will show you how NATS and Pathway Live Data Framework fit the bill",[140,29655,29657],{"id":29656},"basic-terminologies","Basic Terminologies",[73,29659,29660,29661,29664],{},"Before diving deeper into the implementation, here’s a quick glossary of terms and concepts used in this tutorial. ",[15804,29662,29663],{},"If you're already familiar with message brokers, feel free to skip this section and proceed to \"Getting Started with NATS\"",". Most of these terms are also briefly explained where needed as you follow through this tutorial.",[145,29666,29667,29676,29682,29688,29694,29699,29705,29710,29716,29722,29728,29734],{},[148,29668,29669,29672,29673,694],{},[169,29670,29671],{},"Publisher",": In a pub\u002Fsub system, the component responsible for sending (or publishing) messages to a particular ",[169,29674,29675],{},"subject",[148,29677,29678,29681],{},[169,29679,29680],{},"Subscriber",": The component that listens to (or subscribes to) messages from a particular subject. It acts upon the messages it receives in real time.",[148,29683,29684,29687],{},[169,29685,29686],{},"Subject",": In NATS, a lightweight mechanism to categorize messages. Publishers send messages to subjects, and subscribers receive messages by subscribing to specific subjects.",[148,29689,29690,29693],{},[169,29691,29692],{},"Telemetry Data",": Sensor data collected from IoT devices or systems. For instance, in the fleet monitoring example, telemetry data includes vehicle location, engine temperature, and fuel levels.",[148,29695,29696,29698],{},[169,29697,219],{},": The process of identifying unusual patterns or critical conditions in data that deviate from normal behavior. For example, detecting high engine temperatures or low fuel levels in vehicle telemetry.",[148,29700,29701,29704],{},[169,29702,29703],{},"Message Broker",": A system or tool like NATS that facilitates message exchange between publishers and subscribers. It ensures reliable communication in distributed systems.",[148,29706,29707,29709],{},[169,29708,16368],{},": A defined structure for data. For example, a schema in Pathway specifies the fields and data types expected in telemetry data.",[148,29711,29712,29715],{},[169,29713,29714],{},"JSON (JavaScript Object Notation)",": A lightweight, text-based format for structuring data. It is commonly used for sending and receiving structured data in APIs and messaging systems.",[148,29717,29718,29721],{},[169,29719,29720],{},"AsyncIO",": A Python library that supports asynchronous programming. It enables efficient handling of I\u002FO-bound and high-level structured network code, such as the publisher and subscriber implementations.",[148,29723,29724,29727],{},[169,29725,29726],{},"Alerting System",": A system or process that notifies stakeholders about critical conditions or anomalies. In this tutorial, alerts are generated for anomalies in telemetry data and logged in real time.",[148,29729,29730,29733],{},[169,29731,29732],{},"Real-Time Processing",": The ability to process data as it arrives, enabling immediate analysis and response. Systems like NATS and Pathway are optimized for real-time data handling.",[148,29735,29736,29739],{},[169,29737,29738],{},"Connector",": A mechanism to integrate different systems. For example, the framework’s NATS connectors allow seamless communication between NATS and Pathway Live Data Framework for real-time data ingestion and processing.",[140,29741,29743],{"id":29742},"getting-started-with-nats","Getting Started with NATS",[3189,29745,29747],{"id":29746},"installing-nats","Installing NATS",[73,29749,29750],{},"Since this tutorial uses NATS, you need to install it first. The easiest way to run it locally is via Docker. Use the command below to start it:",[3418,29752,29754],{"className":6347,"code":29753,"language":6349,"meta":23,"style":23},"docker run -p 4222:4222 nats:latest\n",[3061,29755,29756],{"__ignoreMap":23},[1291,29757,29758,29760,29762,29764,29767],{"class":3427,"line":3428},[1291,29759,7331],{"class":6356},[1291,29761,17106],{"class":3439},[1291,29763,6412],{"class":3439},[1291,29765,29766],{"class":3439}," 4222:4222",[1291,29768,29769],{"class":3439}," nats:latest\n",[73,29771,29772,29773,694],{},"This command pulls and runs the latest NATS Docker image, exposing the default port ",[3061,29774,29775],{},"4222",[73,29777,29778,29779],{},"There are multiple other ways to install and run NATS. Especially on a production server, this might not be the most efficient way to install and run NATS. For multiple other ways to get started with NATS, you can refer to the official NATS documentation here: ",[77,29780,29781],{"href":29781,"rel":29782},"https:\u002F\u002Fdocs.nats.io\u002Frunning-a-nats-service\u002Fintroduction\u002Finstallation",[81],[3189,29784,29786],{"id":29785},"installing-the-nats-python-client","Installing the NATS Python Client",[73,29788,29789,29790,29795,29796,4390],{},"Having the NATS server up and running, you can now proceed using it for messaging. To interact with NATS using Python, you can make use of the ",[77,29791,29794],{"href":29792,"rel":29793},"https:\u002F\u002Fgithub.com\u002Fnats-io\u002Fnats.py",[81],"nats-py"," library. It provides a simple interface for connecting and communicating with a NATS server. Install it using ",[3061,29797,6357],{},[3418,29799,29801],{"className":6347,"code":29800,"language":6349,"meta":23,"style":23},"pip install nats-py\n",[3061,29802,29803],{"__ignoreMap":23},[1291,29804,29805,29807,29809],{"class":3427,"line":3428},[1291,29806,6357],{"class":6356},[1291,29808,6360],{"class":3439},[1291,29810,29811],{"class":3439}," nats-py\n",[140,29813,29815],{"id":29814},"creating-a-publisher","Creating a Publisher",[73,29817,29818],{},"Now that the NATS server is running and the NATS client is also installed, you can follow the steps below to create a simple publisher - a program sending messages to a NATS subject.",[665,29820,29821],{},[148,29822,29823,4390],{},[169,29824,29825],{},"Import the necessary modules",[73,29827,29828,29829,29832,29833,29836],{},"You need ",[3061,29830,29831],{},"asyncio"," for asynchronous programming and ",[3061,29834,29835],{},"nats"," to interact with the NATS server.",[3418,29838,29840],{"className":3420,"code":29839,"language":3422,"meta":23,"style":23},"import asyncio\nimport nats\n",[3061,29841,29842,29849],{"__ignoreMap":23},[1291,29843,29844,29846],{"class":3427,"line":3428},[1291,29845,3476],{"class":3475},[1291,29847,29848],{"class":3431}," asyncio\n",[1291,29850,29851,29853],{"class":3427,"line":24},[1291,29852,3476],{"class":3475},[1291,29854,29855],{"class":3431}," nats\n",[665,29857,29858],{"start":24},[148,29859,29860,4390],{},[169,29861,29862],{},"Define an asynchronous function to publish a message",[73,29864,29865],{},"This function handles the publishing process.",[3418,29867,29869],{"className":3420,"code":29868,"language":3422,"meta":23,"style":23},"async def publish_message():\n",[3061,29870,29871],{"__ignoreMap":23},[1291,29872,29873,29875,29877,29880],{"class":3427,"line":3428},[1291,29874,9369],{"class":7739},[1291,29876,9372],{"class":7739},[1291,29878,29879],{"class":3812}," publish_message",[1291,29881,27496],{"class":3435},[665,29883,29884],{"start":675},[148,29885,29886,4390],{},[169,29887,29888],{},"Connect to the NATS server",[73,29890,29891,29892,29895,29896,29899],{},"You can use ",[3061,29893,29894],{},"nats.connect()"," to establish a connection to the NATS server running locally. The connection string ",[3061,29897,29898],{},"\"nats:\u002F\u002Flocalhost:4222\""," specifies the server's address and port.",[3418,29901,29903],{"className":3420,"code":29902,"language":3422,"meta":23,"style":23},"nc = await nats.connect(\"nats:\u002F\u002Flocalhost:4222\")\n",[3061,29904,29905],{"__ignoreMap":23},[1291,29906,29907,29910,29912,29914,29917,29919,29922,29924,29926,29929,29931],{"class":3427,"line":3428},[1291,29908,29909],{"class":3431},"nc ",[1291,29911,3738],{"class":3435},[1291,29913,9511],{"class":3475},[1291,29915,29916],{"class":3431}," nats",[1291,29918,694],{"class":3435},[1291,29920,29921],{"class":3812},"connect",[1291,29923,3816],{"class":3435},[1291,29925,3691],{"class":3435},[1291,29927,29928],{"class":3439},"nats:\u002F\u002Flocalhost:4222",[1291,29930,3691],{"class":3435},[1291,29932,3827],{"class":3435},[665,29934,29935],{"start":3542},[148,29936,29937,4390],{},[169,29938,29939],{},"Publish a message to a subject",[73,29941,29942,29945,29946,29949,29950,29953,29954,29956],{},[3061,29943,29944],{},"nc.publish(\"updates\", b\"Hello, NATS!\")"," sends the message ",[3061,29947,29948],{},"\"Hello, NATS!\""," to the subject ",[3061,29951,29952],{},"updates",". The message is prefixed with ",[3061,29955,2944],{}," to indicate that it's a byte string, which is required by NATS.",[3418,29958,29960],{"className":3420,"code":29959,"language":3422,"meta":23,"style":23},"await nc.publish(\"updates\", b\"Hello, NATS!\")\nprint(\"Message sent: Hello, NATS!\")\n",[3061,29961,29962,29996],{"__ignoreMap":23},[1291,29963,29964,29966,29969,29971,29974,29976,29978,29980,29982,29984,29987,29989,29992,29994],{"class":3427,"line":3428},[1291,29965,9782],{"class":3475},[1291,29967,29968],{"class":3431}," nc",[1291,29970,694],{"class":3435},[1291,29972,29973],{"class":3812},"publish",[1291,29975,3816],{"class":3435},[1291,29977,3691],{"class":3435},[1291,29979,29952],{"class":3439},[1291,29981,3691],{"class":3435},[1291,29983,3566],{"class":3435},[1291,29985,29986],{"class":7739}," b",[1291,29988,3691],{"class":3435},[1291,29990,29991],{"class":3439},"Hello, NATS!",[1291,29993,3691],{"class":3435},[1291,29995,3827],{"class":3435},[1291,29997,29998,30000,30002,30004,30007,30009],{"class":3427,"line":24},[1291,29999,4986],{"class":3812},[1291,30001,3816],{"class":3435},[1291,30003,3691],{"class":3435},[1291,30005,30006],{"class":3439},"Message sent: Hello, NATS!",[1291,30008,3691],{"class":3435},[1291,30010,3827],{"class":3435},[665,30012,30013],{"start":3547},[148,30014,30015,4390],{},[169,30016,30017],{},"Close the connection",[3418,30019,30021],{"className":3420,"code":30020,"language":3422,"meta":23,"style":23},"await nc.close()\n",[3061,30022,30023],{"__ignoreMap":23},[1291,30024,30025,30027,30029,30031,30034],{"class":3427,"line":3428},[1291,30026,9782],{"class":3475},[1291,30028,29968],{"class":3431},[1291,30030,694],{"class":3435},[1291,30032,30033],{"class":3812},"close",[1291,30035,4871],{"class":3435},[30037,30038,30040],"alert",{"icon":135,"type":30039},"success",[73,30041,30042],{},"It's good practice to close the connection when it's no longer needed.",[665,30044,30045],{"start":3572},[148,30046,30047,4390],{},[169,30048,30049],{},"Run the asynchronous function",[3418,30051,30053],{"className":3420,"code":30052,"language":3422,"meta":23,"style":23},"asyncio.run(publish_message())\n",[3061,30054,30055],{"__ignoreMap":23},[1291,30056,30057,30059,30061,30063,30065,30068],{"class":3427,"line":3428},[1291,30058,29831],{"class":3431},[1291,30060,694],{"class":3435},[1291,30062,11274],{"class":3812},[1291,30064,3816],{"class":3435},[1291,30066,30067],{"class":3812},"publish_message",[1291,30069,6237],{"class":3435},[73,30071,30072,30073,30076],{},"This starts the event loop and runs the ",[3061,30074,30075],{},"publish_message()"," function.",[3189,30078,30080,30081,30084],{"id":30079},"complete-publisher-code-publisherpy","Complete Publisher Code (",[3061,30082,30083],{},"publisher.py","):",[3418,30086,30088],{"className":3420,"code":30087,"language":3422,"meta":23,"style":23},"import asyncio\nimport nats\n\nasync def publish_message():\n    # Connect to the NATS server\n    nc = await nats.connect(\"nats:\u002F\u002Flocalhost:4222\")\n    \n    # Publish a message to the 'updates' subject\n    await nc.publish(\"updates\", b\"Hello, NATS!\")\n    print(\"Message sent: Hello, NATS!\")\n    \n    # Close the connection\n    await nc.close()\n\n# Run the asynchronous function\nasyncio.run(publish_message())\n",[3061,30089,30090,30096,30102,30106,30116,30121,30146,30151,30156,30187,30201,30205,30210,30222,30226,30231],{"__ignoreMap":23},[1291,30091,30092,30094],{"class":3427,"line":3428},[1291,30093,3476],{"class":3475},[1291,30095,29848],{"class":3431},[1291,30097,30098,30100],{"class":3427,"line":24},[1291,30099,3476],{"class":3475},[1291,30101,29855],{"class":3431},[1291,30103,30104],{"class":3427,"line":675},[1291,30105,3526],{"emptyLinePlaceholder":35},[1291,30107,30108,30110,30112,30114],{"class":3427,"line":3542},[1291,30109,9369],{"class":7739},[1291,30111,9372],{"class":7739},[1291,30113,29879],{"class":3812},[1291,30115,27496],{"class":3435},[1291,30117,30118],{"class":3427,"line":3547},[1291,30119,30120],{"class":3673},"    # Connect to the NATS server\n",[1291,30122,30123,30126,30128,30130,30132,30134,30136,30138,30140,30142,30144],{"class":3427,"line":3572},[1291,30124,30125],{"class":3431},"    nc ",[1291,30127,3738],{"class":3435},[1291,30129,9511],{"class":3475},[1291,30131,29916],{"class":3431},[1291,30133,694],{"class":3435},[1291,30135,29921],{"class":3812},[1291,30137,3816],{"class":3435},[1291,30139,3691],{"class":3435},[1291,30141,29928],{"class":3439},[1291,30143,3691],{"class":3435},[1291,30145,3827],{"class":3435},[1291,30147,30148],{"class":3427,"line":3614},[1291,30149,30150],{"class":3431},"    \n",[1291,30152,30153],{"class":3427,"line":3640},[1291,30154,30155],{"class":3673},"    # Publish a message to the 'updates' subject\n",[1291,30157,30158,30161,30163,30165,30167,30169,30171,30173,30175,30177,30179,30181,30183,30185],{"class":3427,"line":3665},[1291,30159,30160],{"class":3475},"    await",[1291,30162,29968],{"class":3431},[1291,30164,694],{"class":3435},[1291,30166,29973],{"class":3812},[1291,30168,3816],{"class":3435},[1291,30170,3691],{"class":3435},[1291,30172,29952],{"class":3439},[1291,30174,3691],{"class":3435},[1291,30176,3566],{"class":3435},[1291,30178,29986],{"class":7739},[1291,30180,3691],{"class":3435},[1291,30182,29991],{"class":3439},[1291,30184,3691],{"class":3435},[1291,30186,3827],{"class":3435},[1291,30188,30189,30191,30193,30195,30197,30199],{"class":3427,"line":3670},[1291,30190,27099],{"class":3812},[1291,30192,3816],{"class":3435},[1291,30194,3691],{"class":3435},[1291,30196,30006],{"class":3439},[1291,30198,3691],{"class":3435},[1291,30200,3827],{"class":3435},[1291,30202,30203],{"class":3427,"line":3677},[1291,30204,30150],{"class":3431},[1291,30206,30207],{"class":3427,"line":3877},[1291,30208,30209],{"class":3673},"    # Close the connection\n",[1291,30211,30212,30214,30216,30218,30220],{"class":3427,"line":3916},[1291,30213,30160],{"class":3475},[1291,30215,29968],{"class":3431},[1291,30217,694],{"class":3435},[1291,30219,30033],{"class":3812},[1291,30221,4871],{"class":3435},[1291,30223,30224],{"class":3427,"line":4519},[1291,30225,3526],{"emptyLinePlaceholder":35},[1291,30227,30228],{"class":3427,"line":6038},[1291,30229,30230],{"class":3673},"# Run the asynchronous function\n",[1291,30232,30233,30235,30237,30239,30241,30243],{"class":3427,"line":6043},[1291,30234,29831],{"class":3431},[1291,30236,694],{"class":3435},[1291,30238,11274],{"class":3812},[1291,30240,3816],{"class":3435},[1291,30242,30067],{"class":3812},[1291,30244,6237],{"class":3435},[3189,30246,30248],{"id":30247},"creating-a-subscriber","Creating a Subscriber",[73,30250,30251],{},"After writing code for the publisher, you now need to write a subscriber that listens for messages on the same subject and handles them as they arrive.",[73,30253,30254],{},"Follow the below instructions to create a subscriber script:",[665,30256,30257],{},[148,30258,30259,4390],{},[169,30260,29825],{},[3418,30262,30264],{"className":3420,"code":30263,"language":3422,"meta":23,"style":23},"import asyncio \nimport nats\nimport argparse\n",[3061,30265,30266,30273,30279],{"__ignoreMap":23},[1291,30267,30268,30270],{"class":3427,"line":3428},[1291,30269,3476],{"class":3475},[1291,30271,30272],{"class":3431}," asyncio \n",[1291,30274,30275,30277],{"class":3427,"line":24},[1291,30276,3476],{"class":3475},[1291,30278,29855],{"class":3431},[1291,30280,30281,30283],{"class":3427,"line":675},[1291,30282,3476],{"class":3475},[1291,30284,30285],{"class":3431}," argparse\n",[665,30287,30288],{"start":24},[148,30289,30290,4390],{},[169,30291,30292],{},"Define an asynchronous function to subscribe to messages",[3418,30294,30296],{"className":3420,"code":30295,"language":3422,"meta":23,"style":23},"async def subscribe_messages(subject):\n",[3061,30297,30298],{"__ignoreMap":23},[1291,30299,30300,30302,30304,30307,30309,30311],{"class":3427,"line":3428},[1291,30301,9369],{"class":7739},[1291,30303,9372],{"class":7739},[1291,30305,30306],{"class":3812}," subscribe_messages",[1291,30308,3816],{"class":3435},[1291,30310,29675],{"class":3819},[1291,30312,11948],{"class":3435},[665,30314,30315],{"start":675},[148,30316,30317,4390],{},[169,30318,29888],{},[3418,30320,30321],{"className":3420,"code":29902,"language":3422,"meta":23,"style":23},[3061,30322,30323],{"__ignoreMap":23},[1291,30324,30325,30327,30329,30331,30333,30335,30337,30339,30341,30343,30345],{"class":3427,"line":3428},[1291,30326,29909],{"class":3431},[1291,30328,3738],{"class":3435},[1291,30330,9511],{"class":3475},[1291,30332,29916],{"class":3431},[1291,30334,694],{"class":3435},[1291,30336,29921],{"class":3812},[1291,30338,3816],{"class":3435},[1291,30340,3691],{"class":3435},[1291,30342,29928],{"class":3439},[1291,30344,3691],{"class":3435},[1291,30346,3827],{"class":3435},[665,30348,30349],{"start":3542},[148,30350,30351,4390],{},[169,30352,30353],{},"Define a message handler function",[3418,30355,30357],{"className":3420,"code":30356,"language":3422,"meta":23,"style":23},"async def message_handler(msg):\n    print(f\"Received a message on '{msg.subject}': {msg.data.decode()}\")\n",[3061,30358,30359,30375],{"__ignoreMap":23},[1291,30360,30361,30363,30365,30368,30370,30373],{"class":3427,"line":3428},[1291,30362,9369],{"class":7739},[1291,30364,9372],{"class":7739},[1291,30366,30367],{"class":3812}," message_handler",[1291,30369,3816],{"class":3435},[1291,30371,30372],{"class":3819},"msg",[1291,30374,11948],{"class":3435},[1291,30376,30377,30379,30381,30383,30386,30388,30390,30392,30394,30396,30399,30401,30403,30405,30407,30409,30412,30414,30416,30418],{"class":3427,"line":24},[1291,30378,27099],{"class":3812},[1291,30380,3816],{"class":3435},[1291,30382,9643],{"class":7739},[1291,30384,30385],{"class":3439},"\"Received a message on '",[1291,30387,8770],{"class":3451},[1291,30389,30372],{"class":3812},[1291,30391,694],{"class":3435},[1291,30393,29675],{"class":3457},[1291,30395,9671],{"class":3451},[1291,30397,30398],{"class":3439},"': ",[1291,30400,8770],{"class":3451},[1291,30402,30372],{"class":3812},[1291,30404,694],{"class":3435},[1291,30406,3935],{"class":3457},[1291,30408,694],{"class":3435},[1291,30410,30411],{"class":3812},"decode",[1291,30413,12394],{"class":3435},[1291,30415,9671],{"class":3451},[1291,30417,3691],{"class":3439},[1291,30419,3827],{"class":3435},[73,30421,30422,30423,30426,30427,30430],{},"This function is called whenever a message is received on the subscribed subject. ",[3061,30424,30425],{},"msg.subject"," contains the subject of the message. ",[3061,30428,30429],{},"msg.data"," contains the message data in bytes, so you decode it to a string.",[665,30432,30433],{"start":3547},[148,30434,30435,4390],{},[169,30436,30437],{},"Subscribe to the subject passed as a parameter",[3418,30439,30441],{"className":3420,"code":30440,"language":3422,"meta":23,"style":23},"await nc.subscribe(subject, cb=message_handler)\nprint(f\"Subscribed to '{subject}' subject.\")\n",[3061,30442,30443,30470],{"__ignoreMap":23},[1291,30444,30445,30447,30449,30451,30454,30456,30458,30460,30463,30465,30468],{"class":3427,"line":3428},[1291,30446,9782],{"class":3475},[1291,30448,29968],{"class":3431},[1291,30450,694],{"class":3435},[1291,30452,30453],{"class":3812},"subscribe",[1291,30455,3816],{"class":3435},[1291,30457,29675],{"class":3812},[1291,30459,3566],{"class":3435},[1291,30461,30462],{"class":3819}," cb",[1291,30464,3738],{"class":3435},[1291,30466,30467],{"class":3812},"message_handler",[1291,30469,3827],{"class":3435},[1291,30471,30472,30474,30476,30478,30481,30483,30485,30487,30490],{"class":3427,"line":24},[1291,30473,4986],{"class":3812},[1291,30475,3816],{"class":3435},[1291,30477,9643],{"class":7739},[1291,30479,30480],{"class":3439},"\"Subscribed to '",[1291,30482,8770],{"class":3451},[1291,30484,29675],{"class":3812},[1291,30486,9671],{"class":3451},[1291,30488,30489],{"class":3439},"' subject.\"",[1291,30491,3827],{"class":3435},[73,30493,30494,30497,30498,30500],{},[3061,30495,30496],{},"nc.subscribe()"," subscribes to the specified subject and assigns the message handler. The callback ",[3061,30499,30467],{}," is called whenever a message is received.",[665,30502,30503],{"start":3572},[148,30504,30505,4390],{},[169,30506,30507],{},"Keep the subscriber running indefinitely",[3418,30509,30511],{"className":3420,"code":30510,"language":3422,"meta":23,"style":23},"while True: \n    await asyncio.sleep(1)\n",[3061,30512,30513,30523],{"__ignoreMap":23},[1291,30514,30515,30518,30521],{"class":3427,"line":3428},[1291,30516,30517],{"class":3475},"while",[1291,30519,30520],{"class":3435}," True:",[1291,30522,7743],{"class":3431},[1291,30524,30525,30527,30530,30532,30535,30537,30539],{"class":3427,"line":24},[1291,30526,30160],{"class":3475},[1291,30528,30529],{"class":3431}," asyncio",[1291,30531,694],{"class":3435},[1291,30533,30534],{"class":3812},"sleep",[1291,30536,3816],{"class":3435},[1291,30538,24626],{"class":3451},[1291,30540,3827],{"class":3435},[73,30542,30543],{},"This infinite loop ensures that the subscriber keeps running to listen for incoming messages.",[665,30545,30546],{"start":3614},[148,30547,30548,4390],{},[169,30549,30550],{},"Set Up Argument Parser",[3418,30552,30554],{"className":3420,"code":30553,"language":3422,"meta":23,"style":23},"if __name__ == \"__main__\":\n    # Set up argument parser\n    parser = argparse.ArgumentParser(description='NATS Subscriber')\n    parser.add_argument('--subject', type=str, required=True, help='NATS subject to subscribe to')\n    args = parser.parse_args()\n\n    subject = args.subject\n",[3061,30555,30556,30574,30579,30610,30658,30674,30678],{"__ignoreMap":23},[1291,30557,30558,30560,30563,30565,30567,30570,30572],{"class":3427,"line":3428},[1291,30559,5223],{"class":3475},[1291,30561,30562],{"class":3431}," __name__ ",[1291,30564,3448],{"class":3435},[1291,30566,3705],{"class":3435},[1291,30568,30569],{"class":3439},"__main__",[1291,30571,3691],{"class":3435},[1291,30573,5243],{"class":3435},[1291,30575,30576],{"class":3427,"line":24},[1291,30577,30578],{"class":3673},"    # Set up argument parser\n",[1291,30580,30581,30584,30586,30589,30591,30594,30596,30599,30601,30603,30606,30608],{"class":3427,"line":675},[1291,30582,30583],{"class":3431},"    parser ",[1291,30585,3738],{"class":3435},[1291,30587,30588],{"class":3431}," argparse",[1291,30590,694],{"class":3435},[1291,30592,30593],{"class":3812},"ArgumentParser",[1291,30595,3816],{"class":3435},[1291,30597,30598],{"class":3819},"description",[1291,30600,3738],{"class":3435},[1291,30602,3436],{"class":3435},[1291,30604,30605],{"class":3439},"NATS Subscriber",[1291,30607,3436],{"class":3435},[1291,30609,3827],{"class":3435},[1291,30611,30612,30614,30616,30619,30621,30623,30626,30628,30630,30633,30635,30637,30639,30642,30644,30647,30649,30651,30654,30656],{"class":3427,"line":3542},[1291,30613,4616],{"class":3431},[1291,30615,694],{"class":3435},[1291,30617,30618],{"class":3812},"add_argument",[1291,30620,3816],{"class":3435},[1291,30622,3436],{"class":3435},[1291,30624,30625],{"class":3439},"--subject",[1291,30627,3436],{"class":3435},[1291,30629,3566],{"class":3435},[1291,30631,30632],{"class":3819}," type",[1291,30634,3738],{"class":3435},[1291,30636,7171],{"class":6356},[1291,30638,3566],{"class":3435},[1291,30640,30641],{"class":3819}," required",[1291,30643,5605],{"class":3435},[1291,30645,30646],{"class":3819}," help",[1291,30648,3738],{"class":3435},[1291,30650,3436],{"class":3435},[1291,30652,30653],{"class":3439},"NATS subject to subscribe to",[1291,30655,3436],{"class":3435},[1291,30657,3827],{"class":3435},[1291,30659,30660,30663,30665,30667,30669,30672],{"class":3427,"line":3547},[1291,30661,30662],{"class":3431},"    args ",[1291,30664,3738],{"class":3435},[1291,30666,9401],{"class":3431},[1291,30668,694],{"class":3435},[1291,30670,30671],{"class":3812},"parse_args",[1291,30673,4871],{"class":3435},[1291,30675,30676],{"class":3427,"line":3572},[1291,30677,3526],{"emptyLinePlaceholder":35},[1291,30679,30680,30683,30685,30688,30690],{"class":3427,"line":3614},[1291,30681,30682],{"class":3431},"    subject ",[1291,30684,3738],{"class":3435},[1291,30686,30687],{"class":3431}," args",[1291,30689,694],{"class":3435},[1291,30691,30692],{"class":3457},"subject\n",[73,30694,30695,30696,30698],{},"Define a ",[3061,30697,30625],{}," flag that must be provided while running the script.",[665,30700,30701],{"start":3640},[148,30702,30703,4390],{},[169,30704,30049],{},[3418,30706,30708],{"className":3420,"code":30707,"language":3422,"meta":23,"style":23},"asyncio.run(subscribe_messages(subject))\n",[3061,30709,30710],{"__ignoreMap":23},[1291,30711,30712,30714,30716,30718,30720,30723,30725,30727],{"class":3427,"line":3428},[1291,30713,29831],{"class":3431},[1291,30715,694],{"class":3435},[1291,30717,11274],{"class":3812},[1291,30719,3816],{"class":3435},[1291,30721,30722],{"class":3812},"subscribe_messages",[1291,30724,3816],{"class":3435},[1291,30726,29675],{"class":3812},[1291,30728,7178],{"class":3435},[3189,30730,30732,30733,30084],{"id":30731},"complete-subscriber-code-subscriberpy","Complete Subscriber Code (",[3061,30734,30735],{},"subscriber.py",[3418,30737,30739],{"className":3420,"code":30738,"filename":30735,"language":3422,"meta":23,"style":23},"import asyncio\nimport nats\nimport argparse\n\nasync def subscribe_messages(subject):\n    # Connect to the NATS server\n    nc = await nats.connect(\"nats:\u002F\u002Flocalhost:4222\")\n\n    # Define a message handler to process incoming messages\n    async def message_handler(msg):\n        print(f\"Received a message on '{msg.subject}': {msg.data.decode()}\")\n\n    # Subscribe to the subject provided\n    await nc.subscribe(subject, cb=message_handler)\n    print(f\"Subscribed to '{subject}' subject.\")\n\n    # Keep the subscriber running indefinitely\n    while True:\n        await asyncio.sleep(1)\n\nif __name__ == \"__main__\":\n    # Set up argument parser\n    parser = argparse.ArgumentParser(description='NATS Subscriber')\n    parser.add_argument('--subject', type=str, required=True, help='NATS subject to subscribe to')\n    args = parser.parse_args()\n\n    subject = args.subject\n\n    # Run the asynchronous function\n    asyncio.run(subscribe_messages(subject))\n",[3061,30740,30741,30747,30753,30759,30763,30777,30781,30805,30809,30814,30829,30872,30876,30881,30905,30925,30929,30934,30942,30959,30963,30979,30983,31009,31051,31065,31069,31081,31085,31090],{"__ignoreMap":23},[1291,30742,30743,30745],{"class":3427,"line":3428},[1291,30744,3476],{"class":3475},[1291,30746,29848],{"class":3431},[1291,30748,30749,30751],{"class":3427,"line":24},[1291,30750,3476],{"class":3475},[1291,30752,29855],{"class":3431},[1291,30754,30755,30757],{"class":3427,"line":675},[1291,30756,3476],{"class":3475},[1291,30758,30285],{"class":3431},[1291,30760,30761],{"class":3427,"line":3542},[1291,30762,3526],{"emptyLinePlaceholder":35},[1291,30764,30765,30767,30769,30771,30773,30775],{"class":3427,"line":3547},[1291,30766,9369],{"class":7739},[1291,30768,9372],{"class":7739},[1291,30770,30306],{"class":3812},[1291,30772,3816],{"class":3435},[1291,30774,29675],{"class":3819},[1291,30776,11948],{"class":3435},[1291,30778,30779],{"class":3427,"line":3572},[1291,30780,30120],{"class":3673},[1291,30782,30783,30785,30787,30789,30791,30793,30795,30797,30799,30801,30803],{"class":3427,"line":3614},[1291,30784,30125],{"class":3431},[1291,30786,3738],{"class":3435},[1291,30788,9511],{"class":3475},[1291,30790,29916],{"class":3431},[1291,30792,694],{"class":3435},[1291,30794,29921],{"class":3812},[1291,30796,3816],{"class":3435},[1291,30798,3691],{"class":3435},[1291,30800,29928],{"class":3439},[1291,30802,3691],{"class":3435},[1291,30804,3827],{"class":3435},[1291,30806,30807],{"class":3427,"line":3640},[1291,30808,3526],{"emptyLinePlaceholder":35},[1291,30810,30811],{"class":3427,"line":3665},[1291,30812,30813],{"class":3673},"    # Define a message handler to process incoming messages\n",[1291,30815,30816,30819,30821,30823,30825,30827],{"class":3427,"line":3670},[1291,30817,30818],{"class":7739},"    async",[1291,30820,9372],{"class":7739},[1291,30822,30367],{"class":3812},[1291,30824,3816],{"class":3435},[1291,30826,30372],{"class":3819},[1291,30828,11948],{"class":3435},[1291,30830,30831,30834,30836,30838,30840,30842,30844,30846,30848,30850,30852,30854,30856,30858,30860,30862,30864,30866,30868,30870],{"class":3427,"line":3677},[1291,30832,30833],{"class":3812},"        print",[1291,30835,3816],{"class":3435},[1291,30837,9643],{"class":7739},[1291,30839,30385],{"class":3439},[1291,30841,8770],{"class":3451},[1291,30843,30372],{"class":3812},[1291,30845,694],{"class":3435},[1291,30847,29675],{"class":3457},[1291,30849,9671],{"class":3451},[1291,30851,30398],{"class":3439},[1291,30853,8770],{"class":3451},[1291,30855,30372],{"class":3812},[1291,30857,694],{"class":3435},[1291,30859,3935],{"class":3457},[1291,30861,694],{"class":3435},[1291,30863,30411],{"class":3812},[1291,30865,12394],{"class":3435},[1291,30867,9671],{"class":3451},[1291,30869,3691],{"class":3439},[1291,30871,3827],{"class":3435},[1291,30873,30874],{"class":3427,"line":3877},[1291,30875,3526],{"emptyLinePlaceholder":35},[1291,30877,30878],{"class":3427,"line":3916},[1291,30879,30880],{"class":3673},"    # Subscribe to the subject provided\n",[1291,30882,30883,30885,30887,30889,30891,30893,30895,30897,30899,30901,30903],{"class":3427,"line":4519},[1291,30884,30160],{"class":3475},[1291,30886,29968],{"class":3431},[1291,30888,694],{"class":3435},[1291,30890,30453],{"class":3812},[1291,30892,3816],{"class":3435},[1291,30894,29675],{"class":3812},[1291,30896,3566],{"class":3435},[1291,30898,30462],{"class":3819},[1291,30900,3738],{"class":3435},[1291,30902,30467],{"class":3812},[1291,30904,3827],{"class":3435},[1291,30906,30907,30909,30911,30913,30915,30917,30919,30921,30923],{"class":3427,"line":6038},[1291,30908,27099],{"class":3812},[1291,30910,3816],{"class":3435},[1291,30912,9643],{"class":7739},[1291,30914,30480],{"class":3439},[1291,30916,8770],{"class":3451},[1291,30918,29675],{"class":3812},[1291,30920,9671],{"class":3451},[1291,30922,30489],{"class":3439},[1291,30924,3827],{"class":3435},[1291,30926,30927],{"class":3427,"line":6043},[1291,30928,3526],{"emptyLinePlaceholder":35},[1291,30930,30931],{"class":3427,"line":6066},[1291,30932,30933],{"class":3673},"    # Keep the subscriber running indefinitely\n",[1291,30935,30936,30939],{"class":3427,"line":6078},[1291,30937,30938],{"class":3475},"    while",[1291,30940,30941],{"class":3435}," True:\n",[1291,30943,30944,30947,30949,30951,30953,30955,30957],{"class":3427,"line":6089},[1291,30945,30946],{"class":3475},"        await",[1291,30948,30529],{"class":3431},[1291,30950,694],{"class":3435},[1291,30952,30534],{"class":3812},[1291,30954,3816],{"class":3435},[1291,30956,24626],{"class":3451},[1291,30958,3827],{"class":3435},[1291,30960,30961],{"class":3427,"line":6124},[1291,30962,3526],{"emptyLinePlaceholder":35},[1291,30964,30965,30967,30969,30971,30973,30975,30977],{"class":3427,"line":6133},[1291,30966,5223],{"class":3475},[1291,30968,30562],{"class":3431},[1291,30970,3448],{"class":3435},[1291,30972,3705],{"class":3435},[1291,30974,30569],{"class":3439},[1291,30976,3691],{"class":3435},[1291,30978,5243],{"class":3435},[1291,30980,30981],{"class":3427,"line":6141},[1291,30982,30578],{"class":3673},[1291,30984,30985,30987,30989,30991,30993,30995,30997,30999,31001,31003,31005,31007],{"class":3427,"line":6151},[1291,30986,30583],{"class":3431},[1291,30988,3738],{"class":3435},[1291,30990,30588],{"class":3431},[1291,30992,694],{"class":3435},[1291,30994,30593],{"class":3812},[1291,30996,3816],{"class":3435},[1291,30998,30598],{"class":3819},[1291,31000,3738],{"class":3435},[1291,31002,3436],{"class":3435},[1291,31004,30605],{"class":3439},[1291,31006,3436],{"class":3435},[1291,31008,3827],{"class":3435},[1291,31010,31011,31013,31015,31017,31019,31021,31023,31025,31027,31029,31031,31033,31035,31037,31039,31041,31043,31045,31047,31049],{"class":3427,"line":6923},[1291,31012,4616],{"class":3431},[1291,31014,694],{"class":3435},[1291,31016,30618],{"class":3812},[1291,31018,3816],{"class":3435},[1291,31020,3436],{"class":3435},[1291,31022,30625],{"class":3439},[1291,31024,3436],{"class":3435},[1291,31026,3566],{"class":3435},[1291,31028,30632],{"class":3819},[1291,31030,3738],{"class":3435},[1291,31032,7171],{"class":6356},[1291,31034,3566],{"class":3435},[1291,31036,30641],{"class":3819},[1291,31038,5605],{"class":3435},[1291,31040,30646],{"class":3819},[1291,31042,3738],{"class":3435},[1291,31044,3436],{"class":3435},[1291,31046,30653],{"class":3439},[1291,31048,3436],{"class":3435},[1291,31050,3827],{"class":3435},[1291,31052,31053,31055,31057,31059,31061,31063],{"class":3427,"line":6928},[1291,31054,30662],{"class":3431},[1291,31056,3738],{"class":3435},[1291,31058,9401],{"class":3431},[1291,31060,694],{"class":3435},[1291,31062,30671],{"class":3812},[1291,31064,4871],{"class":3435},[1291,31066,31067],{"class":3427,"line":6934},[1291,31068,3526],{"emptyLinePlaceholder":35},[1291,31070,31071,31073,31075,31077,31079],{"class":3427,"line":6940},[1291,31072,30682],{"class":3431},[1291,31074,3738],{"class":3435},[1291,31076,30687],{"class":3431},[1291,31078,694],{"class":3435},[1291,31080,30692],{"class":3457},[1291,31082,31083],{"class":3427,"line":6952},[1291,31084,3526],{"emptyLinePlaceholder":35},[1291,31086,31087],{"class":3427,"line":6984},[1291,31088,31089],{"class":3673},"    # Run the asynchronous function\n",[1291,31091,31092,31095,31097,31099,31101,31103,31105,31107],{"class":3427,"line":7996},[1291,31093,31094],{"class":3431},"    asyncio",[1291,31096,694],{"class":3435},[1291,31098,11274],{"class":3812},[1291,31100,3816],{"class":3435},[1291,31102,30722],{"class":3812},[1291,31104,3816],{"class":3435},[1291,31106,29675],{"class":3812},[1291,31108,7178],{"class":3435},[140,31110,31112],{"id":31111},"testing-the-setup","Testing the Setup",[73,31114,31115,31116,31121],{},"It's crucial to run the subscriber ",[169,31117,31118],{},[15804,31119,31120],{},"before"," the publisher. NATS does not buffer messages by default; it delivers messages to subscribers who are actively listening at the time of publishing. If you run the publisher first, the message won’t be sent anywhere because no subscribers are listening yet.",[73,31123,31124],{},"Having that given, you can run the pipeline using these two simple steps:",[665,31126,31127],{},[148,31128,31129],{},"Run the subscriber script first:",[3418,31131,31133],{"className":6347,"code":31132,"language":6349,"meta":23,"style":23},"python subscriber.py —-subject updates\n",[3061,31134,31135],{"__ignoreMap":23},[1291,31136,31137,31139,31142,31145],{"class":3427,"line":3428},[1291,31138,3422],{"class":6356},[1291,31140,31141],{"class":3439}," subscriber.py",[1291,31143,31144],{"class":3439}," —-subject",[1291,31146,31147],{"class":3439}," updates\n",[665,31149,31150],{"start":24},[148,31151,31152],{},"Then, in another terminal, you can run the publisher script:",[3418,31154,31156],{"className":6347,"code":31155,"language":6349,"meta":23,"style":23},"python publisher.py\n",[3061,31157,31158],{"__ignoreMap":23},[1291,31159,31160,31162],{"class":3427,"line":3428},[1291,31161,3422],{"class":6356},[1291,31163,31164],{"class":3439}," publisher.py\n",[73,31166,31167],{},"Expected Output on Subscriber Terminal:",[3418,31169,31171],{"className":6347,"code":31170,"language":6349,"meta":23,"style":23},"Subscribed to 'updates' subject.\nReceived a message on 'updates': Hello, NATS!\n",[3061,31172,31173,31189],{"__ignoreMap":23},[1291,31174,31175,31178,31180,31182,31184,31186],{"class":3427,"line":3428},[1291,31176,31177],{"class":6356},"Subscribed",[1291,31179,27180],{"class":3439},[1291,31181,6415],{"class":3435},[1291,31183,29952],{"class":3439},[1291,31185,3436],{"class":3435},[1291,31187,31188],{"class":3439}," subject.\n",[1291,31190,31191,31194,31197,31200,31202,31204,31206,31208,31210,31213],{"class":3427,"line":24},[1291,31192,31193],{"class":6356},"Received",[1291,31195,31196],{"class":3439}," a",[1291,31198,31199],{"class":3439}," message",[1291,31201,17544],{"class":3439},[1291,31203,6415],{"class":3435},[1291,31205,29952],{"class":3439},[1291,31207,3436],{"class":3435},[1291,31209,4390],{"class":3439},[1291,31211,31212],{"class":3439}," Hello,",[1291,31214,31215],{"class":3439}," NATS!\n",[73,31217,31218,31219,31221],{},"The subscriber listens to the ",[3061,31220,29952],{}," subject.\nWhen the publisher sends a message, the subscriber immediately receives and prints it.",[140,31223,31225],{"id":31224},"getting-started-with-pathway-live-data-framework","Getting Started with Pathway Live Data Framework",[3189,31227,31229],{"id":31228},"installing-the-pathway-live-data-framework","Installing the Pathway Live Data Framework",[73,31231,31232,31233,31235],{},"Install the framework using ",[3061,31234,6357],{}," by running the following command:",[3418,31237,31239],{"className":6347,"code":31238,"language":6349,"meta":23,"style":23},"pip install pathway\n",[3061,31240,31241],{"__ignoreMap":23},[1291,31242,31243,31245,31247],{"class":3427,"line":3428},[1291,31244,6357],{"class":6356},[1291,31246,6360],{"class":3439},[1291,31248,6363],{"class":3439},[3189,31250,31252],{"id":31251},"connecting-pathway-live-data-framework-with-nats","Connecting Pathway Live Data Framework with NATS",[73,31254,31255],{},"The Pathway Live Data Framework has recently introduced connectors for NATS, enabling seamless integration between the two systems. This allows for efficient ingestion and processing of real-time data streams from NATS within the framework.",[3206,31257,31259],{"id":31258},"supported-nats-parameters-and-formats","Supported NATS Parameters and Formats:",[73,31261,31262],{},"When using the framework's NATS connectors, you can specify various parameters. Below are some of the parameters that are used in the code below:",[145,31264,31265,31274,31279],{},[148,31266,31267,31270,31271,19244],{},[15804,31268,31269],{},"uri",": The URI of the NATS server (e.g., ",[3061,31272,31273],{},"\"nats:\u002F\u002F127.0.0.1:4222\"",[148,31275,31276,31278],{},[15804,31277,29675],{},": The NATS subject to subscribe messages from.",[148,31280,31281,31284,31285],{},[15804,31282,31283],{},"format",": The format of the messages. Supported formats include:\n",[145,31286,31287,31293,31299],{},[148,31288,31289,31292],{},[3061,31290,31291],{},"\"plaintext\"",": Messages are expected to be plain text strings and decoded from UTF-8. Useful for simple string messages.",[148,31294,31295,31298],{},[3061,31296,31297],{},"\"json\"",": Messages are JSON-formatted strings, which will be parsed into structured data. Requires specifying a schema.",[148,31300,31301,31304],{},[3061,31302,31303],{},"\"raw\"",": Messages are treated as raw bytes without any decoding.",[3206,31306,31308],{"id":31307},"additional-parameters","Additional Parameters:",[145,31310,31311,31320,31326],{},[148,31312,31313,31315,31316,31319],{},[15804,31314,15987],{},": Used only when ",[3061,31317,31318],{},"format=\"json\"",". Defines the structure of the data to map JSON fields to table columns.",[148,31321,31322,31325],{},[15804,31323,31324],{},"autocommit_duration_ms",": The maximum time between two commits. Every autocommit_duration_ms milliseconds, the updates received by the connector are committed and pushed into Pathway's dataflow.",[148,31327,31328,31331,31332,694],{},[15804,31329,31330],{},"json_field_paths",": Allows mapping field names to paths within the JSON structure when using ",[3061,31333,31318],{},[73,31335,31336,31337],{},"For a detailed list of all the supported parameters and examples, you can head to the Pathway documentation on NATS connectors: ",[77,31338,31339],{"href":31339,"rel":31340},"https:\u002F\u002Fpathway.com\u002Fdevelopers\u002Fapi-docs\u002Fpathway-io\u002Fnats",[81],[3189,31342,31344],{"id":31343},"readingwriting-nats-messages-with-pathway","Reading\u002FWriting NATS Messages with Pathway",[73,31346,31347],{},"Here's how you can read messages from a NATS subject, process them using Pathway, and write the processed messages back to another NATS subject:",[665,31349,31350],{},[148,31351,31352,4390],{},[169,31353,31354],{},"Import Pathway",[3418,31356,31358],{"className":3420,"code":31357,"language":3422,"meta":23,"style":23},"import pathway as pw\n",[3061,31359,31360],{"__ignoreMap":23},[1291,31361,31362,31364,31366,31368],{"class":3427,"line":3428},[1291,31363,3476],{"class":3475},[1291,31365,3533],{"class":3431},[1291,31367,3536],{"class":3475},[1291,31369,3539],{"class":3431},[665,31371,31372],{"start":24},[148,31373,31374,4390],{},[169,31375,31376],{},"Read messages from NATS using 'plaintext' format",[3418,31378,31380],{"className":3420,"code":31379,"language":3422,"meta":23,"style":23},"message_table = pw.io.nats.read(\n    uri=\"nats:\u002F\u002F127.0.0.1:4222\",\n    topic=\"updates\",\n    format=\"plaintext\"\n)\n",[3061,31381,31382,31405,31421,31436,31449],{"__ignoreMap":23},[1291,31383,31384,31387,31389,31391,31393,31395,31397,31399,31401,31403],{"class":3427,"line":3428},[1291,31385,31386],{"class":3431},"message_table ",[1291,31388,3738],{"class":3435},[1291,31390,4073],{"class":3431},[1291,31392,694],{"class":3435},[1291,31394,4078],{"class":3457},[1291,31396,694],{"class":3435},[1291,31398,29835],{"class":3457},[1291,31400,694],{"class":3435},[1291,31402,4088],{"class":3812},[1291,31404,3874],{"class":3435},[1291,31406,31407,31410,31412,31414,31417,31419],{"class":3427,"line":24},[1291,31408,31409],{"class":3819},"    uri",[1291,31411,3738],{"class":3435},[1291,31413,3691],{"class":3435},[1291,31415,31416],{"class":3439},"nats:\u002F\u002F127.0.0.1:4222",[1291,31418,3691],{"class":3435},[1291,31420,4107],{"class":3435},[1291,31422,31423,31426,31428,31430,31432,31434],{"class":3427,"line":675},[1291,31424,31425],{"class":3819},"    topic",[1291,31427,3738],{"class":3435},[1291,31429,3691],{"class":3435},[1291,31431,29952],{"class":3439},[1291,31433,3691],{"class":3435},[1291,31435,4107],{"class":3435},[1291,31437,31438,31440,31442,31444,31447],{"class":3427,"line":3542},[1291,31439,4112],{"class":3819},[1291,31441,3738],{"class":3435},[1291,31443,3691],{"class":3435},[1291,31445,31446],{"class":3439},"plaintext",[1291,31448,3746],{"class":3435},[1291,31450,31451],{"class":3427,"line":3547},[1291,31452,3827],{"class":3435},[73,31454,31455,31456,31459],{},"Use ",[3061,31457,31458],{},"pw.io.nats.read()"," to subscribe to the updates subject. The format=\"plaintext\" indicates that messages are plain text and decoded from UTF-8. The messages are stored in the data column of message_table.",[665,31461,31462],{"start":675},[148,31463,31464,4390],{},[169,31465,31466],{},"Process the messages",[3418,31468,31470],{"className":3420,"code":31469,"language":3422,"meta":23,"style":23},"processed_messages = message_table.select(\n    message=pw.this.data\n)\n",[3061,31471,31472,31488,31506],{"__ignoreMap":23},[1291,31473,31474,31477,31479,31482,31484,31486],{"class":3427,"line":3428},[1291,31475,31476],{"class":3431},"processed_messages ",[1291,31478,3738],{"class":3435},[1291,31480,31481],{"class":3431}," message_table",[1291,31483,694],{"class":3435},[1291,31485,16571],{"class":3812},[1291,31487,3874],{"class":3435},[1291,31489,31490,31493,31495,31497,31499,31501,31503],{"class":3427,"line":24},[1291,31491,31492],{"class":3819},"    message",[1291,31494,3738],{"class":3435},[1291,31496,3841],{"class":3812},[1291,31498,694],{"class":3435},[1291,31500,16845],{"class":3457},[1291,31502,694],{"class":3435},[1291,31504,31505],{"class":3457},"data\n",[1291,31507,31508],{"class":3427,"line":675},[1291,31509,3827],{"class":3435},[73,31511,31512],{},"Create a new table processed_messages by selecting a new column message with the original message intact.",[665,31514,31515],{"start":3542},[148,31516,31517,4390],{},[169,31518,31519],{},"Output processed messages to another subject using 'plaintext' format",[3418,31521,31523],{"className":3420,"code":31522,"language":3422,"meta":23,"style":23},"pw.io.nats.write(\n    processed_messages,\n    uri=\"nats:\u002F\u002F127.0.0.1:4222\",\n    topic=\"processed.updates\",\n    format=\"plaintext\",\n    value=processed_messages.message\n)\n",[3061,31524,31525,31543,31550,31564,31579,31593,31608],{"__ignoreMap":23},[1291,31526,31527,31529,31531,31533,31535,31537,31539,31541],{"class":3427,"line":3428},[1291,31528,3841],{"class":3431},[1291,31530,694],{"class":3435},[1291,31532,4078],{"class":3457},[1291,31534,694],{"class":3435},[1291,31536,29835],{"class":3457},[1291,31538,694],{"class":3435},[1291,31540,9700],{"class":3812},[1291,31542,3874],{"class":3435},[1291,31544,31545,31548],{"class":3427,"line":24},[1291,31546,31547],{"class":3812},"    processed_messages",[1291,31549,4107],{"class":3435},[1291,31551,31552,31554,31556,31558,31560,31562],{"class":3427,"line":675},[1291,31553,31409],{"class":3819},[1291,31555,3738],{"class":3435},[1291,31557,3691],{"class":3435},[1291,31559,31416],{"class":3439},[1291,31561,3691],{"class":3435},[1291,31563,4107],{"class":3435},[1291,31565,31566,31568,31570,31572,31575,31577],{"class":3427,"line":3542},[1291,31567,31425],{"class":3819},[1291,31569,3738],{"class":3435},[1291,31571,3691],{"class":3435},[1291,31573,31574],{"class":3439},"processed.updates",[1291,31576,3691],{"class":3435},[1291,31578,4107],{"class":3435},[1291,31580,31581,31583,31585,31587,31589,31591],{"class":3427,"line":3547},[1291,31582,4112],{"class":3819},[1291,31584,3738],{"class":3435},[1291,31586,3691],{"class":3435},[1291,31588,31446],{"class":3439},[1291,31590,3691],{"class":3435},[1291,31592,4107],{"class":3435},[1291,31594,31595,31598,31600,31603,31605],{"class":3427,"line":3572},[1291,31596,31597],{"class":3819},"    value",[1291,31599,3738],{"class":3435},[1291,31601,31602],{"class":3812},"processed_messages",[1291,31604,694],{"class":3435},[1291,31606,31607],{"class":3457},"message\n",[1291,31609,31610],{"class":3427,"line":3614},[1291,31611,3827],{"class":3435},[73,31613,31455,31614,31617],{},[3061,31615,31616],{},"pw.io.nats.write()"," to publish messages to the processed.updates subject. format=\"plaintext\" specifies that the messages are plain text. value=processed_messages.message indicates which column to use as the message payload.",[665,31619,31620],{"start":3547},[148,31621,31622,4390],{},[169,31623,31624],{},"Run the Pathway pipeline",[3418,31626,31628],{"className":3420,"code":31627,"language":3422,"meta":23,"style":23},"pw.run()\n",[3061,31629,31630],{"__ignoreMap":23},[1291,31631,31632,31634,31636,31638],{"class":3427,"line":3428},[1291,31633,3841],{"class":3431},[1291,31635,694],{"class":3435},[1291,31637,11274],{"class":3812},[1291,31639,4871],{"class":3435},[73,31641,31642],{},"This starts the Pathway computation graph.",[3189,31644,31646],{"id":31645},"complete-pathway-code-pathway_processorpy","Complete Pathway Code (pathway_processor.py):",[3418,31648,31651],{"className":3420,"code":31649,"filename":31650,"language":3422,"meta":23,"style":23},"import pathway as pw\n\n# Read messages from NATS\nmessage_table = pw.io.nats.read(\n    uri=\"nats:\u002F\u002F127.0.0.1:4222\",\n    topic=\"updates\",\n    format=\"plaintext\"\n)\n\n# Process the messages as you wish\nprocessed_messages = message_table.select(\n    message=pw.this.data\n)\n\n# Output processed messages to another subject\npw.io.nats.write(\n    processed_messages,\n    uri=\"nats:\u002F\u002F127.0.0.1:4222\",\n    topic=\"processed.updates\",\n    format=\"json\"\n)\n\npw.run()\n","pathway_processor.py",[3061,31652,31653,31663,31667,31672,31694,31708,31722,31734,31738,31742,31747,31761,31777,31781,31785,31790,31808,31814,31828,31842,31854,31858,31862],{"__ignoreMap":23},[1291,31654,31655,31657,31659,31661],{"class":3427,"line":3428},[1291,31656,3476],{"class":3475},[1291,31658,3533],{"class":3431},[1291,31660,3536],{"class":3475},[1291,31662,3539],{"class":3431},[1291,31664,31665],{"class":3427,"line":24},[1291,31666,3526],{"emptyLinePlaceholder":35},[1291,31668,31669],{"class":3427,"line":675},[1291,31670,31671],{"class":3673},"# Read messages from NATS\n",[1291,31673,31674,31676,31678,31680,31682,31684,31686,31688,31690,31692],{"class":3427,"line":3542},[1291,31675,31386],{"class":3431},[1291,31677,3738],{"class":3435},[1291,31679,4073],{"class":3431},[1291,31681,694],{"class":3435},[1291,31683,4078],{"class":3457},[1291,31685,694],{"class":3435},[1291,31687,29835],{"class":3457},[1291,31689,694],{"class":3435},[1291,31691,4088],{"class":3812},[1291,31693,3874],{"class":3435},[1291,31695,31696,31698,31700,31702,31704,31706],{"class":3427,"line":3547},[1291,31697,31409],{"class":3819},[1291,31699,3738],{"class":3435},[1291,31701,3691],{"class":3435},[1291,31703,31416],{"class":3439},[1291,31705,3691],{"class":3435},[1291,31707,4107],{"class":3435},[1291,31709,31710,31712,31714,31716,31718,31720],{"class":3427,"line":3572},[1291,31711,31425],{"class":3819},[1291,31713,3738],{"class":3435},[1291,31715,3691],{"class":3435},[1291,31717,29952],{"class":3439},[1291,31719,3691],{"class":3435},[1291,31721,4107],{"class":3435},[1291,31723,31724,31726,31728,31730,31732],{"class":3427,"line":3614},[1291,31725,4112],{"class":3819},[1291,31727,3738],{"class":3435},[1291,31729,3691],{"class":3435},[1291,31731,31446],{"class":3439},[1291,31733,3746],{"class":3435},[1291,31735,31736],{"class":3427,"line":3640},[1291,31737,3827],{"class":3435},[1291,31739,31740],{"class":3427,"line":3665},[1291,31741,3526],{"emptyLinePlaceholder":35},[1291,31743,31744],{"class":3427,"line":3670},[1291,31745,31746],{"class":3673},"# Process the messages as you wish\n",[1291,31748,31749,31751,31753,31755,31757,31759],{"class":3427,"line":3677},[1291,31750,31476],{"class":3431},[1291,31752,3738],{"class":3435},[1291,31754,31481],{"class":3431},[1291,31756,694],{"class":3435},[1291,31758,16571],{"class":3812},[1291,31760,3874],{"class":3435},[1291,31762,31763,31765,31767,31769,31771,31773,31775],{"class":3427,"line":3877},[1291,31764,31492],{"class":3819},[1291,31766,3738],{"class":3435},[1291,31768,3841],{"class":3812},[1291,31770,694],{"class":3435},[1291,31772,16845],{"class":3457},[1291,31774,694],{"class":3435},[1291,31776,31505],{"class":3457},[1291,31778,31779],{"class":3427,"line":3916},[1291,31780,3827],{"class":3435},[1291,31782,31783],{"class":3427,"line":4519},[1291,31784,3526],{"emptyLinePlaceholder":35},[1291,31786,31787],{"class":3427,"line":6038},[1291,31788,31789],{"class":3673},"# Output processed messages to another subject\n",[1291,31791,31792,31794,31796,31798,31800,31802,31804,31806],{"class":3427,"line":6043},[1291,31793,3841],{"class":3431},[1291,31795,694],{"class":3435},[1291,31797,4078],{"class":3457},[1291,31799,694],{"class":3435},[1291,31801,29835],{"class":3457},[1291,31803,694],{"class":3435},[1291,31805,9700],{"class":3812},[1291,31807,3874],{"class":3435},[1291,31809,31810,31812],{"class":3427,"line":6066},[1291,31811,31547],{"class":3812},[1291,31813,4107],{"class":3435},[1291,31815,31816,31818,31820,31822,31824,31826],{"class":3427,"line":6078},[1291,31817,31409],{"class":3819},[1291,31819,3738],{"class":3435},[1291,31821,3691],{"class":3435},[1291,31823,31416],{"class":3439},[1291,31825,3691],{"class":3435},[1291,31827,4107],{"class":3435},[1291,31829,31830,31832,31834,31836,31838,31840],{"class":3427,"line":6089},[1291,31831,31425],{"class":3819},[1291,31833,3738],{"class":3435},[1291,31835,3691],{"class":3435},[1291,31837,31574],{"class":3439},[1291,31839,3691],{"class":3435},[1291,31841,4107],{"class":3435},[1291,31843,31844,31846,31848,31850,31852],{"class":3427,"line":6124},[1291,31845,4112],{"class":3819},[1291,31847,3738],{"class":3435},[1291,31849,3691],{"class":3435},[1291,31851,8623],{"class":3439},[1291,31853,3746],{"class":3435},[1291,31855,31856],{"class":3427,"line":6133},[1291,31857,3827],{"class":3435},[1291,31859,31860],{"class":3427,"line":6141},[1291,31861,3526],{"emptyLinePlaceholder":35},[1291,31863,31864,31866,31868,31870],{"class":3427,"line":6151},[1291,31865,3841],{"class":3431},[1291,31867,694],{"class":3435},[1291,31869,11274],{"class":3812},[1291,31871,4871],{"class":3435},[73,31873,31874,31875,31877],{},"To demonstrate the data flow, this time you have to run the subscriber with ",[3061,31876,31574],{}," subject.",[3206,31879,31881],{"id":31880},"running-the-pathway-script","Running the Pathway Script",[665,31883,31884],{},[148,31885,31886],{},"Start the Pathway processor script that listens to the messages on the subject “updates “ and forwards them to the subject “processed.updates”:",[3418,31888,31890],{"className":6347,"code":31889,"language":6349,"meta":23,"style":23},"python pathway_processor.py\n",[3061,31891,31892],{"__ignoreMap":23},[1291,31893,31894,31896],{"class":3427,"line":3428},[1291,31895,3422],{"class":6356},[1291,31897,31898],{"class":3439}," pathway_processor.py\n",[665,31900,31901],{"start":24},[148,31902,31903],{},"Run the subscriber script that listens to the messages produced by the Pathway script run in the previous step. Since it forwards messages to the subject “processed.updates”, this subject will be listened to by the script:",[3418,31905,31907],{"className":6347,"code":31906,"language":6349,"meta":23,"style":23},"python subscriber.py --subject processed.updates\n",[3061,31908,31909],{"__ignoreMap":23},[1291,31910,31911,31913,31915,31918],{"class":3427,"line":3428},[1291,31912,3422],{"class":6356},[1291,31914,31141],{"class":3439},[1291,31916,31917],{"class":3439}," --subject",[1291,31919,31920],{"class":3439}," processed.updates\n",[665,31922,31923],{"start":675},[148,31924,31925],{},"Run the publisher script to produce messages to the subject “updates”. These messages will be picked up by Pathway script ran in the step 1 and then will be forwarded to the subject “processed.updates” that is listened by a subscriber script from the step 2:",[3418,31927,31928],{"className":6347,"code":31155,"language":6349,"meta":23,"style":23},[3061,31929,31930],{"__ignoreMap":23},[1291,31931,31932,31934],{"class":3427,"line":3428},[1291,31933,3422],{"class":6356},[1291,31935,31164],{"class":3439},[73,31937,31938,4390],{},[169,31939,31940],{},"Expected Output on Subscriber Terminal",[3418,31942,31944],{"className":6347,"code":31943,"language":6349,"meta":23,"style":23},"Subscribed to 'processed.updates' subject.\nReceived a processed message on 'processed.updates': Hello, NATS!\n",[3061,31945,31946,31960],{"__ignoreMap":23},[1291,31947,31948,31950,31952,31954,31956,31958],{"class":3427,"line":3428},[1291,31949,31177],{"class":6356},[1291,31951,27180],{"class":3439},[1291,31953,6415],{"class":3435},[1291,31955,31574],{"class":3439},[1291,31957,3436],{"class":3435},[1291,31959,31188],{"class":3439},[1291,31961,31962,31964,31966,31969,31971,31973,31975,31977,31979,31981,31983],{"class":3427,"line":24},[1291,31963,31193],{"class":6356},[1291,31965,31196],{"class":3439},[1291,31967,31968],{"class":3439}," processed",[1291,31970,31199],{"class":3439},[1291,31972,17544],{"class":3439},[1291,31974,6415],{"class":3435},[1291,31976,31574],{"class":3439},[1291,31978,3436],{"class":3435},[1291,31980,4390],{"class":3439},[1291,31982,31212],{"class":3439},[1291,31984,31215],{"class":3439},[140,31986,31988],{"id":31987},"real-time-fleet-monitoring-use-case-with-kafka-and-flink-alternatives-nats-and-pathway-live-data-framework","Real Time Fleet Monitoring use case with Kafka and Flink Alternatives: NATS and Pathway Live Data Framework",[3189,31990,31992],{"id":31991},"problem-statement","Problem Statement",[73,31994,31995],{},"Suppose you work at a logistics company that operates a fleet of vehicles equipped with sensors that send telemetry data such as location, engine temperature, fuel level, and brake health. You need to monitor these vehicles in real-time to optimize routing and logistics. For instance, tracking the exact locations of the fleet makes it possible to make more efficient dispatching decisions.",[73,31997,31998],{},"In addition, it is crucial to detect critical issues like engine overheating or low fuel levels to prevent breakdowns and delays. By analyzing the incoming telemetry data, you aim to predict maintenance needs before failures occur, enhancing safety and operational efficiency.",[73,32000,32001],{},"The immediate alerts for any critical conditions are essential to maintain uninterrupted service and ensure the safety of drivers and cargo.",[3189,32003,32005],{"id":32004},"system-architecture","System Architecture",[73,32007,32008],{},"To effectively monitor this fleet, you come up with an architecture that integrates data collection, processing and alerting components. The description of each component can be found below.",[1141,32010],{":zoomable":1143,"alt":32011,"className":32012,"sizes":16088,"src":32013},"System architecture",[23361],"\u002Fassets\u002Fcontent\u002Fblog\u002Fsystem-architecture-1.png",[73,32015,32016],{},"The data flow in the system is illustrated by the following figure:",[1141,32018],{":zoomable":1143,"alt":32011,"className":32019,"sizes":16088,"src":32020},[23361],"\u002Fassets\u002Fcontent\u002Fblog\u002Fsystem-architecture-2.png",[3206,32022,32024],{"id":32023},"vehiclesiot-devices","Vehicles\u002FIoT Devices:",[73,32026,32027,32028,32031,32032,694],{},"The devices generate telemetry data, such as vehicle location, speed, temperature, and any other relevant metrics. This data is published to a specific subject on the ",[169,32029,32030],{},"NATS Server",", named ",[3061,32033,32034],{},"fleet.telemetry",[3206,32036,32038],{"id":32037},"nats-server","NATS Server:",[73,32040,32041],{},"The NATS Server acts as a message broker facilitating communication between various system components. Two different  subjects will be used in the NATS server here, one to send the telemetry data and one to send alerts.",[145,32043,32044,32049],{},[148,32045,32046,32048],{},[3061,32047,32034],{},": Vehicles\u002FIoT devices send their telemetry data to this subject. The Pathway Python Script subscribes to this subject to process the incoming telemetry data.",[148,32050,32051,32054],{},[3061,32052,32053],{},"fleet.alerts",": After processing, if any conditions or anomalies are detected, the Pathway Python Script publishes alerts to this subject.",[3206,32056,32058],{"id":32057},"pathway-live-data-framework-anomaly-detection-script","Pathway Live Data Framework Anomaly Detection Script:",[73,32060,32061,32062,32064,32065,32067],{},"This script is responsible for processing the telemetry data it receives from ",[3061,32063,32034],{},". It might include logic for detecting anomalies, such as speeding, out-of-bounds location, or malfunction alerts. If an anomaly is detected, the script publishes an alert to the ",[3061,32066,32053],{}," subject on the NATS server.",[3206,32069,32071],{"id":32070},"alert-subscriber","Alert Subscriber:",[73,32073,32074,32075,32077],{},"This component subscribes to the ",[3061,32076,32053],{}," subject to receive alerts published by the Pathway Python Script in json format.",[3206,32079,32081],{"id":32080},"alerting-system","Alerting System:",[73,32083,32084],{},"The Alert Subscriber pushes these alerts to the Alerting System. The Alerting System can be responsible for:",[145,32086,32087,32090],{},[148,32088,32089],{},"Delivering the alerts to stakeholders, such as sending notifications via email, SMS, or dashboard updates.",[148,32091,32092],{},"Triggering further workflows or escalations based on the nature of the alerts.",[73,32094,32095],{},"Since the primary focus of this tutorial is data processing and anomaly detection using Pathway, for the purpose of this tutorial, an alerting system is omitted. Alerts will be printed on the terminal, which can later be pushed to any alerting system of your choice.",[3189,32097,32099],{"id":32098},"simulating-telemetry-data","Simulating Telemetry Data",[73,32101,32102],{},"Since there is no real source of signals in this tutorial, you need to have a simulator that creates and publishes random data to a NATS subject.",[73,32104,32105],{},"Below are several steps you need to take in order to create this simulator:",[665,32107,32108],{},[148,32109,32110,4390],{},[169,32111,32112],{},"Import necessary modules",[3418,32114,32116],{"className":3420,"code":32115,"language":3422,"meta":23,"style":23},"import asyncio\nimport nats\nimport json\nfrom datetime import datetime\nimport random\n",[3061,32117,32118,32124,32130,32136,32148],{"__ignoreMap":23},[1291,32119,32120,32122],{"class":3427,"line":3428},[1291,32121,3476],{"class":3475},[1291,32123,29848],{"class":3431},[1291,32125,32126,32128],{"class":3427,"line":24},[1291,32127,3476],{"class":3475},[1291,32129,29855],{"class":3431},[1291,32131,32132,32134],{"class":3427,"line":675},[1291,32133,3476],{"class":3475},[1291,32135,28159],{"class":3431},[1291,32137,32138,32140,32143,32145],{"class":3427,"line":3542},[1291,32139,3550],{"class":3475},[1291,32141,32142],{"class":3431}," datetime ",[1291,32144,3476],{"class":3475},[1291,32146,32147],{"class":3431}," datetime\n",[1291,32149,32150,32152],{"class":3427,"line":3547},[1291,32151,3476],{"class":3475},[1291,32153,32154],{"class":3431}," random\n",[665,32156,32157],{"start":24},[148,32158,32159,4390],{},[169,32160,32161],{},"Define an asynchronous function to publish telemetry data",[3418,32163,32165],{"className":3420,"code":32164,"language":3422,"meta":23,"style":23},"async def publish_telemetry():\n",[3061,32166,32167],{"__ignoreMap":23},[1291,32168,32169,32171,32173,32176],{"class":3427,"line":3428},[1291,32170,9369],{"class":7739},[1291,32172,9372],{"class":7739},[1291,32174,32175],{"class":3812}," publish_telemetry",[1291,32177,27496],{"class":3435},[665,32179,32180],{"start":675},[148,32181,32182,4390],{},[169,32183,29888],{},[3418,32185,32186],{"className":3420,"code":29902,"language":3422,"meta":23,"style":23},[3061,32187,32188],{"__ignoreMap":23},[1291,32189,32190,32192,32194,32196,32198,32200,32202,32204,32206,32208,32210],{"class":3427,"line":3428},[1291,32191,29909],{"class":3431},[1291,32193,3738],{"class":3435},[1291,32195,9511],{"class":3475},[1291,32197,29916],{"class":3431},[1291,32199,694],{"class":3435},[1291,32201,29921],{"class":3812},[1291,32203,3816],{"class":3435},[1291,32205,3691],{"class":3435},[1291,32207,29928],{"class":3439},[1291,32209,3691],{"class":3435},[1291,32211,3827],{"class":3435},[665,32213,32214],{"start":3542},[148,32215,32216,4390],{},[169,32217,32218],{},"Simulate a list of vehicle IDs",[3418,32220,32222],{"className":3420,"code":32221,"language":3422,"meta":23,"style":23},"vehicle_ids = [f\"TRUCK-{i}\" for i in range(1, 6)]  # Simulate 5 trucks\n",[3061,32223,32224],{"__ignoreMap":23},[1291,32225,32226,32229,32231,32233,32235,32238,32240,32242,32244,32246,32248,32250,32252,32254,32256,32258,32260,32263,32265],{"class":3427,"line":3428},[1291,32227,32228],{"class":3431},"vehicle_ids ",[1291,32230,3738],{"class":3435},[1291,32232,4145],{"class":3435},[1291,32234,9643],{"class":7739},[1291,32236,32237],{"class":3439},"\"TRUCK-",[1291,32239,8770],{"class":3451},[1291,32241,25164],{"class":3431},[1291,32243,9671],{"class":3451},[1291,32245,3691],{"class":3439},[1291,32247,9560],{"class":3475},[1291,32249,27972],{"class":3431},[1291,32251,9566],{"class":3475},[1291,32253,24485],{"class":3812},[1291,32255,3816],{"class":3435},[1291,32257,24626],{"class":3451},[1291,32259,3566],{"class":3435},[1291,32261,32262],{"class":3451}," 6",[1291,32264,24316],{"class":3435},[1291,32266,32267],{"class":3673},"  # Simulate 5 trucks\n",[665,32269,32270],{"start":3547},[148,32271,32272,4390],{},[169,32273,32274],{},"Start an infinite loop to publish data periodically",[3418,32276,32278],{"className":3420,"code":32277,"language":3422,"meta":23,"style":23},"while True:\n",[3061,32279,32280],{"__ignoreMap":23},[1291,32281,32282,32284],{"class":3427,"line":3428},[1291,32283,30517],{"class":3475},[1291,32285,30941],{"class":3435},[665,32287,32288],{"start":3572},[148,32289,32290,4390],{},[169,32291,32292],{},"Generate random telemetry data",[3418,32294,32296],{"className":3420,"code":32295,"language":3422,"meta":23,"style":23},"telemetry = {\n    \"vehicle_id\": random.choice(vehicle_ids),\n    \"timestamp\": datetime.utcnow().isoformat(),\n    \"lat\": random.uniform(34.0, 35.0),\n    \"lon\": random.uniform(-118.0, -117.0),\n    \"engine_temp\": random.randint(70, 120),  # Critical if >100\n    \"fuel_level\": random.randint(10, 100),   # Critical if \u003C20\n    \"brake_health\": random.randint(50, 100)  # Critical if \u003C60\n}\n",[3061,32297,32298,32307,32333,32359,32389,32421,32454,32485,32515],{"__ignoreMap":23},[1291,32299,32300,32303,32305],{"class":3427,"line":3428},[1291,32301,32302],{"class":3431},"telemetry ",[1291,32304,3738],{"class":3435},[1291,32306,4377],{"class":3435},[1291,32308,32309,32311,32314,32316,32318,32321,32323,32326,32328,32331],{"class":3427,"line":24},[1291,32310,4382],{"class":3435},[1291,32312,32313],{"class":3439},"vehicle_id",[1291,32315,3691],{"class":3435},[1291,32317,4390],{"class":3435},[1291,32319,32320],{"class":3431}," random",[1291,32322,694],{"class":3435},[1291,32324,32325],{"class":3812},"choice",[1291,32327,3816],{"class":3435},[1291,32329,32330],{"class":3812},"vehicle_ids",[1291,32332,4242],{"class":3435},[1291,32334,32335,32337,32340,32342,32344,32347,32349,32352,32354,32357],{"class":3427,"line":675},[1291,32336,4382],{"class":3435},[1291,32338,32339],{"class":3439},"timestamp",[1291,32341,3691],{"class":3435},[1291,32343,4390],{"class":3435},[1291,32345,32346],{"class":3431}," datetime",[1291,32348,694],{"class":3435},[1291,32350,32351],{"class":3812},"utcnow",[1291,32353,10341],{"class":3435},[1291,32355,32356],{"class":3812},"isoformat",[1291,32358,12703],{"class":3435},[1291,32360,32361,32363,32366,32368,32370,32372,32374,32377,32379,32382,32384,32387],{"class":3427,"line":3542},[1291,32362,4382],{"class":3435},[1291,32364,32365],{"class":3439},"lat",[1291,32367,3691],{"class":3435},[1291,32369,4390],{"class":3435},[1291,32371,32320],{"class":3431},[1291,32373,694],{"class":3435},[1291,32375,32376],{"class":3812},"uniform",[1291,32378,3816],{"class":3435},[1291,32380,32381],{"class":3451},"34.0",[1291,32383,3566],{"class":3435},[1291,32385,32386],{"class":3451}," 35.0",[1291,32388,4242],{"class":3435},[1291,32390,32391,32393,32396,32398,32400,32402,32404,32406,32409,32412,32414,32416,32419],{"class":3427,"line":3547},[1291,32392,4382],{"class":3435},[1291,32394,32395],{"class":3439},"lon",[1291,32397,3691],{"class":3435},[1291,32399,4390],{"class":3435},[1291,32401,32320],{"class":3431},[1291,32403,694],{"class":3435},[1291,32405,32376],{"class":3812},[1291,32407,32408],{"class":3435},"(-",[1291,32410,32411],{"class":3451},"118.0",[1291,32413,3566],{"class":3435},[1291,32415,5838],{"class":3435},[1291,32417,32418],{"class":3451},"117.0",[1291,32420,4242],{"class":3435},[1291,32422,32423,32425,32428,32430,32432,32434,32436,32439,32441,32444,32446,32449,32451],{"class":3427,"line":3572},[1291,32424,4382],{"class":3435},[1291,32426,32427],{"class":3439},"engine_temp",[1291,32429,3691],{"class":3435},[1291,32431,4390],{"class":3435},[1291,32433,32320],{"class":3431},[1291,32435,694],{"class":3435},[1291,32437,32438],{"class":3812},"randint",[1291,32440,3816],{"class":3435},[1291,32442,32443],{"class":3451},"70",[1291,32445,3566],{"class":3435},[1291,32447,32448],{"class":3451}," 120",[1291,32450,5639],{"class":3435},[1291,32452,32453],{"class":3673},"  # Critical if >100\n",[1291,32455,32456,32458,32461,32463,32465,32467,32469,32471,32473,32475,32477,32480,32482],{"class":3427,"line":3614},[1291,32457,4382],{"class":3435},[1291,32459,32460],{"class":3439},"fuel_level",[1291,32462,3691],{"class":3435},[1291,32464,4390],{"class":3435},[1291,32466,32320],{"class":3431},[1291,32468,694],{"class":3435},[1291,32470,32438],{"class":3812},[1291,32472,3816],{"class":3435},[1291,32474,6769],{"class":3451},[1291,32476,3566],{"class":3435},[1291,32478,32479],{"class":3451}," 100",[1291,32481,5639],{"class":3435},[1291,32483,32484],{"class":3673},"   # Critical if \u003C20\n",[1291,32486,32487,32489,32492,32494,32496,32498,32500,32502,32504,32506,32508,32510,32512],{"class":3427,"line":3640},[1291,32488,4382],{"class":3435},[1291,32490,32491],{"class":3439},"brake_health",[1291,32493,3691],{"class":3435},[1291,32495,4390],{"class":3435},[1291,32497,32320],{"class":3431},[1291,32499,694],{"class":3435},[1291,32501,32438],{"class":3812},[1291,32503,3816],{"class":3435},[1291,32505,16804],{"class":3451},[1291,32507,3566],{"class":3435},[1291,32509,32479],{"class":3451},[1291,32511,713],{"class":3435},[1291,32513,32514],{"class":3673},"  # Critical if \u003C60\n",[1291,32516,32517],{"class":3427,"line":3665},[1291,32518,4441],{"class":3435},[665,32520,32521],{"start":3614},[148,32522,32523,4390],{},[169,32524,32525,32526,32529],{},"Publish the telemetry data to the ",[3061,32527,32528],{},"'fleet.telemetry'"," subject",[3418,32531,32533],{"className":3420,"code":32532,"language":3422,"meta":23,"style":23},"await nc.publish(\"fleet.telemetry\", json.dumps(telemetry).encode())\n",[3061,32534,32535],{"__ignoreMap":23},[1291,32536,32537,32539,32541,32543,32545,32547,32549,32551,32553,32555,32558,32560,32563,32565,32568,32570,32573],{"class":3427,"line":3428},[1291,32538,9782],{"class":3475},[1291,32540,29968],{"class":3431},[1291,32542,694],{"class":3435},[1291,32544,29973],{"class":3812},[1291,32546,3816],{"class":3435},[1291,32548,3691],{"class":3435},[1291,32550,32034],{"class":3439},[1291,32552,3691],{"class":3435},[1291,32554,3566],{"class":3435},[1291,32556,32557],{"class":3812}," json",[1291,32559,694],{"class":3435},[1291,32561,32562],{"class":3812},"dumps",[1291,32564,3816],{"class":3435},[1291,32566,32567],{"class":3812},"telemetry",[1291,32569,19244],{"class":3435},[1291,32571,32572],{"class":3812},"encode",[1291,32574,6237],{"class":3435},[73,32576,32577],{},"Convert the telemetry data to a JSON string and encode it to bytes.",[665,32579,32580],{"start":3640},[148,32581,32582,4390],{},[169,32583,32584],{},"Wait for a second before publishing the next data point",[3418,32586,32588],{"className":3420,"code":32587,"language":3422,"meta":23,"style":23},"await asyncio.sleep(1)\n",[3061,32589,32590],{"__ignoreMap":23},[1291,32591,32592,32594,32596,32598,32600,32602,32604],{"class":3427,"line":3428},[1291,32593,9782],{"class":3475},[1291,32595,30529],{"class":3431},[1291,32597,694],{"class":3435},[1291,32599,30534],{"class":3812},[1291,32601,3816],{"class":3435},[1291,32603,24626],{"class":3451},[1291,32605,3827],{"class":3435},[665,32607,32608],{"start":3665},[148,32609,32610,4390],{},[169,32611,30049],{},[3418,32613,32615],{"className":3420,"code":32614,"language":3422,"meta":23,"style":23},"asyncio.run(publish_telemetry())\n",[3061,32616,32617],{"__ignoreMap":23},[1291,32618,32619,32621,32623,32625,32627,32630],{"class":3427,"line":3428},[1291,32620,29831],{"class":3431},[1291,32622,694],{"class":3435},[1291,32624,11274],{"class":3812},[1291,32626,3816],{"class":3435},[1291,32628,32629],{"class":3812},"publish_telemetry",[1291,32631,6237],{"class":3435},[3189,32633,32635],{"id":32634},"complete-telemetry-publisher-code-telemetry_publisherpy","Complete Telemetry Publisher Code (telemetry_publisher.py):",[3418,32637,32640],{"className":3420,"code":32638,"filename":32639,"language":3422,"meta":23,"style":23},"import asyncio\nimport nats\nimport json\nfrom datetime import datetime\nimport random\n\nasync def publish_telemetry():\n    # Connect to the NATS server\n    nc = await nats.connect(\"nats:\u002F\u002Flocalhost:4222\")\n    vehicle_ids = [f\"TRUCK-{i}\" for i in range(1, 6)]  # Simulate 5 trucks\n\n    while True:\n        # Generate random telemetry data\n        telemetry = {\n            \"vehicle_id\": random.choice(vehicle_ids),\n            \"timestamp\": datetime.utcnow().isoformat(),\n            \"lat\": random.uniform(34.0, 35.0),\n            \"lon\": random.uniform(-118.0, -117.0),\n            \"engine_temp\": random.randint(70, 120),  # Critical if >100\n            \"fuel_level\": random.randint(10, 100),   # Critical if \u003C20\n            \"brake_health\": random.randint(50, 100)  # Critical if \u003C60\n        }\n        # Publish telemetry data as JSON\n        await nc.publish(\"fleet.telemetry\", json.dumps(telemetry).encode())\n        await asyncio.sleep(1)\n\n# Run the asynchronous function\nasyncio.run(publish_telemetry())\n","telemetry_publisher.py",[3061,32641,32642,32648,32654,32660,32670,32676,32680,32690,32694,32718,32759,32763,32769,32774,32783,32806,32828,32854,32882,32910,32938,32966,32971,32976,33012,33028,33032,33036],{"__ignoreMap":23},[1291,32643,32644,32646],{"class":3427,"line":3428},[1291,32645,3476],{"class":3475},[1291,32647,29848],{"class":3431},[1291,32649,32650,32652],{"class":3427,"line":24},[1291,32651,3476],{"class":3475},[1291,32653,29855],{"class":3431},[1291,32655,32656,32658],{"class":3427,"line":675},[1291,32657,3476],{"class":3475},[1291,32659,28159],{"class":3431},[1291,32661,32662,32664,32666,32668],{"class":3427,"line":3542},[1291,32663,3550],{"class":3475},[1291,32665,32142],{"class":3431},[1291,32667,3476],{"class":3475},[1291,32669,32147],{"class":3431},[1291,32671,32672,32674],{"class":3427,"line":3547},[1291,32673,3476],{"class":3475},[1291,32675,32154],{"class":3431},[1291,32677,32678],{"class":3427,"line":3572},[1291,32679,3526],{"emptyLinePlaceholder":35},[1291,32681,32682,32684,32686,32688],{"class":3427,"line":3614},[1291,32683,9369],{"class":7739},[1291,32685,9372],{"class":7739},[1291,32687,32175],{"class":3812},[1291,32689,27496],{"class":3435},[1291,32691,32692],{"class":3427,"line":3640},[1291,32693,30120],{"class":3673},[1291,32695,32696,32698,32700,32702,32704,32706,32708,32710,32712,32714,32716],{"class":3427,"line":3665},[1291,32697,30125],{"class":3431},[1291,32699,3738],{"class":3435},[1291,32701,9511],{"class":3475},[1291,32703,29916],{"class":3431},[1291,32705,694],{"class":3435},[1291,32707,29921],{"class":3812},[1291,32709,3816],{"class":3435},[1291,32711,3691],{"class":3435},[1291,32713,29928],{"class":3439},[1291,32715,3691],{"class":3435},[1291,32717,3827],{"class":3435},[1291,32719,32720,32723,32725,32727,32729,32731,32733,32735,32737,32739,32741,32743,32745,32747,32749,32751,32753,32755,32757],{"class":3427,"line":3670},[1291,32721,32722],{"class":3431},"    vehicle_ids ",[1291,32724,3738],{"class":3435},[1291,32726,4145],{"class":3435},[1291,32728,9643],{"class":7739},[1291,32730,32237],{"class":3439},[1291,32732,8770],{"class":3451},[1291,32734,25164],{"class":3431},[1291,32736,9671],{"class":3451},[1291,32738,3691],{"class":3439},[1291,32740,9560],{"class":3475},[1291,32742,27972],{"class":3431},[1291,32744,9566],{"class":3475},[1291,32746,24485],{"class":3812},[1291,32748,3816],{"class":3435},[1291,32750,24626],{"class":3451},[1291,32752,3566],{"class":3435},[1291,32754,32262],{"class":3451},[1291,32756,24316],{"class":3435},[1291,32758,32267],{"class":3673},[1291,32760,32761],{"class":3427,"line":3677},[1291,32762,3526],{"emptyLinePlaceholder":35},[1291,32764,32765,32767],{"class":3427,"line":3877},[1291,32766,30938],{"class":3475},[1291,32768,30941],{"class":3435},[1291,32770,32771],{"class":3427,"line":3916},[1291,32772,32773],{"class":3673},"        # Generate random telemetry data\n",[1291,32775,32776,32779,32781],{"class":3427,"line":4519},[1291,32777,32778],{"class":3431},"        telemetry ",[1291,32780,3738],{"class":3435},[1291,32782,4377],{"class":3435},[1291,32784,32785,32788,32790,32792,32794,32796,32798,32800,32802,32804],{"class":3427,"line":6038},[1291,32786,32787],{"class":3435},"            \"",[1291,32789,32313],{"class":3439},[1291,32791,3691],{"class":3435},[1291,32793,4390],{"class":3435},[1291,32795,32320],{"class":3431},[1291,32797,694],{"class":3435},[1291,32799,32325],{"class":3812},[1291,32801,3816],{"class":3435},[1291,32803,32330],{"class":3812},[1291,32805,4242],{"class":3435},[1291,32807,32808,32810,32812,32814,32816,32818,32820,32822,32824,32826],{"class":3427,"line":6043},[1291,32809,32787],{"class":3435},[1291,32811,32339],{"class":3439},[1291,32813,3691],{"class":3435},[1291,32815,4390],{"class":3435},[1291,32817,32346],{"class":3431},[1291,32819,694],{"class":3435},[1291,32821,32351],{"class":3812},[1291,32823,10341],{"class":3435},[1291,32825,32356],{"class":3812},[1291,32827,12703],{"class":3435},[1291,32829,32830,32832,32834,32836,32838,32840,32842,32844,32846,32848,32850,32852],{"class":3427,"line":6066},[1291,32831,32787],{"class":3435},[1291,32833,32365],{"class":3439},[1291,32835,3691],{"class":3435},[1291,32837,4390],{"class":3435},[1291,32839,32320],{"class":3431},[1291,32841,694],{"class":3435},[1291,32843,32376],{"class":3812},[1291,32845,3816],{"class":3435},[1291,32847,32381],{"class":3451},[1291,32849,3566],{"class":3435},[1291,32851,32386],{"class":3451},[1291,32853,4242],{"class":3435},[1291,32855,32856,32858,32860,32862,32864,32866,32868,32870,32872,32874,32876,32878,32880],{"class":3427,"line":6078},[1291,32857,32787],{"class":3435},[1291,32859,32395],{"class":3439},[1291,32861,3691],{"class":3435},[1291,32863,4390],{"class":3435},[1291,32865,32320],{"class":3431},[1291,32867,694],{"class":3435},[1291,32869,32376],{"class":3812},[1291,32871,32408],{"class":3435},[1291,32873,32411],{"class":3451},[1291,32875,3566],{"class":3435},[1291,32877,5838],{"class":3435},[1291,32879,32418],{"class":3451},[1291,32881,4242],{"class":3435},[1291,32883,32884,32886,32888,32890,32892,32894,32896,32898,32900,32902,32904,32906,32908],{"class":3427,"line":6089},[1291,32885,32787],{"class":3435},[1291,32887,32427],{"class":3439},[1291,32889,3691],{"class":3435},[1291,32891,4390],{"class":3435},[1291,32893,32320],{"class":3431},[1291,32895,694],{"class":3435},[1291,32897,32438],{"class":3812},[1291,32899,3816],{"class":3435},[1291,32901,32443],{"class":3451},[1291,32903,3566],{"class":3435},[1291,32905,32448],{"class":3451},[1291,32907,5639],{"class":3435},[1291,32909,32453],{"class":3673},[1291,32911,32912,32914,32916,32918,32920,32922,32924,32926,32928,32930,32932,32934,32936],{"class":3427,"line":6124},[1291,32913,32787],{"class":3435},[1291,32915,32460],{"class":3439},[1291,32917,3691],{"class":3435},[1291,32919,4390],{"class":3435},[1291,32921,32320],{"class":3431},[1291,32923,694],{"class":3435},[1291,32925,32438],{"class":3812},[1291,32927,3816],{"class":3435},[1291,32929,6769],{"class":3451},[1291,32931,3566],{"class":3435},[1291,32933,32479],{"class":3451},[1291,32935,5639],{"class":3435},[1291,32937,32484],{"class":3673},[1291,32939,32940,32942,32944,32946,32948,32950,32952,32954,32956,32958,32960,32962,32964],{"class":3427,"line":6133},[1291,32941,32787],{"class":3435},[1291,32943,32491],{"class":3439},[1291,32945,3691],{"class":3435},[1291,32947,4390],{"class":3435},[1291,32949,32320],{"class":3431},[1291,32951,694],{"class":3435},[1291,32953,32438],{"class":3812},[1291,32955,3816],{"class":3435},[1291,32957,16804],{"class":3451},[1291,32959,3566],{"class":3435},[1291,32961,32479],{"class":3451},[1291,32963,713],{"class":3435},[1291,32965,32514],{"class":3673},[1291,32967,32968],{"class":3427,"line":6141},[1291,32969,32970],{"class":3435},"        }\n",[1291,32972,32973],{"class":3427,"line":6151},[1291,32974,32975],{"class":3673},"        # Publish telemetry data as JSON\n",[1291,32977,32978,32980,32982,32984,32986,32988,32990,32992,32994,32996,32998,33000,33002,33004,33006,33008,33010],{"class":3427,"line":6923},[1291,32979,30946],{"class":3475},[1291,32981,29968],{"class":3431},[1291,32983,694],{"class":3435},[1291,32985,29973],{"class":3812},[1291,32987,3816],{"class":3435},[1291,32989,3691],{"class":3435},[1291,32991,32034],{"class":3439},[1291,32993,3691],{"class":3435},[1291,32995,3566],{"class":3435},[1291,32997,32557],{"class":3812},[1291,32999,694],{"class":3435},[1291,33001,32562],{"class":3812},[1291,33003,3816],{"class":3435},[1291,33005,32567],{"class":3812},[1291,33007,19244],{"class":3435},[1291,33009,32572],{"class":3812},[1291,33011,6237],{"class":3435},[1291,33013,33014,33016,33018,33020,33022,33024,33026],{"class":3427,"line":6928},[1291,33015,30946],{"class":3475},[1291,33017,30529],{"class":3431},[1291,33019,694],{"class":3435},[1291,33021,30534],{"class":3812},[1291,33023,3816],{"class":3435},[1291,33025,24626],{"class":3451},[1291,33027,3827],{"class":3435},[1291,33029,33030],{"class":3427,"line":6934},[1291,33031,3526],{"emptyLinePlaceholder":35},[1291,33033,33034],{"class":3427,"line":6940},[1291,33035,30230],{"class":3673},[1291,33037,33038,33040,33042,33044,33046,33048],{"class":3427,"line":6952},[1291,33039,29831],{"class":3431},[1291,33041,694],{"class":3435},[1291,33043,11274],{"class":3812},[1291,33045,3816],{"class":3435},[1291,33047,32629],{"class":3812},[1291,33049,6237],{"class":3435},[140,33051,33053],{"id":33052},"processing-data-with-pathway-live-data-framework-anomaly-detection-script","Processing Data with Pathway Live Data Framework Anomaly Detection Script",[73,33055,33056,33057,33059,33060,31877],{},"Now that data is flowing into the ",[3061,33058,32034],{}," subject in NATS. It’s time for you to process that data and identify anomalies, as discussed in the proposed architecture above. This step will detect any anomaly in the telemetry data and push it to a ",[3061,33061,32053],{},[73,33063,33064],{},"If one of the below criteria is met in any of the Trucks in your fleet, it’s an anomaly and needs to be alerted immediately:",[145,33066,33067,33070,33073],{},[148,33068,33069],{},"Engine Temperature > 100",[148,33071,33072],{},"Fuel Level \u003C 20",[148,33074,33075],{},"Brake Health \u003C 60",[73,33077,33078],{},"You have to write code using Pathway Live Data Framework to detect these anomalies and push alerts back to the NATS server.",[73,33080,33081],{},"Follow the steps to write the Pathway anomaly detection script:",[665,33083,33084],{},[148,33085,33086,4390],{},[169,33087,31354],{},[3418,33089,33090],{"className":3420,"code":31357,"language":3422,"meta":23,"style":23},[3061,33091,33092],{"__ignoreMap":23},[1291,33093,33094,33096,33098,33100],{"class":3427,"line":3428},[1291,33095,3476],{"class":3475},[1291,33097,3533],{"class":3431},[1291,33099,3536],{"class":3475},[1291,33101,3539],{"class":3431},[665,33103,33104],{"start":24},[148,33105,33106,4390],{},[169,33107,33108],{},"Define the telemetry schema",[3418,33110,33112],{"className":3420,"code":33111,"language":3422,"meta":23,"style":23},"class TelemetrySchema(pw.Schema):\n    vehicle_id: str\n    timestamp: str\n    lat: float\n    lon: float\n    engine_temp: int\n    fuel_level: int\n    brake_health: int\n",[3061,33113,33114,33131,33140,33148,33157,33166,33175,33184],{"__ignoreMap":23},[1291,33115,33116,33118,33121,33123,33125,33127,33129],{"class":3427,"line":3428},[1291,33117,16356],{"class":7739},[1291,33119,33120],{"class":6356}," TelemetrySchema",[1291,33122,3816],{"class":3435},[1291,33124,3841],{"class":6356},[1291,33126,694],{"class":3435},[1291,33128,16368],{"class":6356},[1291,33130,11948],{"class":3435},[1291,33132,33133,33136,33138],{"class":3427,"line":24},[1291,33134,33135],{"class":3431},"    vehicle_id",[1291,33137,4390],{"class":3435},[1291,33139,16380],{"class":6356},[1291,33141,33142,33144,33146],{"class":3427,"line":675},[1291,33143,18017],{"class":3431},[1291,33145,4390],{"class":3435},[1291,33147,16380],{"class":6356},[1291,33149,33150,33153,33155],{"class":3427,"line":3542},[1291,33151,33152],{"class":3431},"    lat",[1291,33154,4390],{"class":3435},[1291,33156,17994],{"class":6356},[1291,33158,33159,33162,33164],{"class":3427,"line":3547},[1291,33160,33161],{"class":3431},"    lon",[1291,33163,4390],{"class":3435},[1291,33165,17994],{"class":6356},[1291,33167,33168,33171,33173],{"class":3427,"line":3572},[1291,33169,33170],{"class":3431},"    engine_temp",[1291,33172,4390],{"class":3435},[1291,33174,18022],{"class":6356},[1291,33176,33177,33180,33182],{"class":3427,"line":3614},[1291,33178,33179],{"class":3431},"    fuel_level",[1291,33181,4390],{"class":3435},[1291,33183,18022],{"class":6356},[1291,33185,33186,33189,33191],{"class":3427,"line":3640},[1291,33187,33188],{"class":3431},"    brake_health",[1291,33190,4390],{"class":3435},[1291,33192,18022],{"class":6356},[73,33194,33195],{},"This schema maps the JSON fields to table columns.",[665,33197,33198],{"start":675},[148,33199,33200,33203],{},[169,33201,33202],{},"Ingest telemetry data from NATS",":\nMessages are read from the fleet.telemetry subject. format=\"json\" specifies that the messages are in JSON format. The schema parameter tells Pathway how to parse the JSON data.",[3418,33205,33207],{"className":3420,"code":33206,"language":3422,"meta":23,"style":23},"telemetry_table = pw.io.nats.read(\n    uri=\"nats:\u002F\u002F127.0.0.1:4222\",\n    topic=\"fleet.telemetry\",\n    format=\"json\",\n    schema=TelemetrySchema\n)\n",[3061,33208,33209,33232,33246,33260,33274,33283],{"__ignoreMap":23},[1291,33210,33211,33214,33216,33218,33220,33222,33224,33226,33228,33230],{"class":3427,"line":3428},[1291,33212,33213],{"class":3431},"telemetry_table ",[1291,33215,3738],{"class":3435},[1291,33217,4073],{"class":3431},[1291,33219,694],{"class":3435},[1291,33221,4078],{"class":3457},[1291,33223,694],{"class":3435},[1291,33225,29835],{"class":3457},[1291,33227,694],{"class":3435},[1291,33229,4088],{"class":3812},[1291,33231,3874],{"class":3435},[1291,33233,33234,33236,33238,33240,33242,33244],{"class":3427,"line":24},[1291,33235,31409],{"class":3819},[1291,33237,3738],{"class":3435},[1291,33239,3691],{"class":3435},[1291,33241,31416],{"class":3439},[1291,33243,3691],{"class":3435},[1291,33245,4107],{"class":3435},[1291,33247,33248,33250,33252,33254,33256,33258],{"class":3427,"line":675},[1291,33249,31425],{"class":3819},[1291,33251,3738],{"class":3435},[1291,33253,3691],{"class":3435},[1291,33255,32034],{"class":3439},[1291,33257,3691],{"class":3435},[1291,33259,4107],{"class":3435},[1291,33261,33262,33264,33266,33268,33270,33272],{"class":3427,"line":3542},[1291,33263,4112],{"class":3819},[1291,33265,3738],{"class":3435},[1291,33267,3691],{"class":3435},[1291,33269,8623],{"class":3439},[1291,33271,3691],{"class":3435},[1291,33273,4107],{"class":3435},[1291,33275,33276,33278,33280],{"class":3427,"line":3547},[1291,33277,18107],{"class":3819},[1291,33279,3738],{"class":3435},[1291,33281,33282],{"class":3812},"TelemetrySchema\n",[1291,33284,33285],{"class":3427,"line":3572},[1291,33286,3827],{"class":3435},[665,33288,33289],{"start":3542},[148,33290,33291,4390],{},[169,33292,33293],{},"Define a User-Defined Function (UDF) for detecting alerts",[73,33295,33296,33297],{},"You can define a UDF to encapsulate the logic for detecting multiple alerts per data point. This allows you to check all the conditions and generate multiple alerts if necessary. Refer to this link to read more about UDFs in Pathway Live Data Framework: ",[77,33298,33299],{"href":33299,"rel":33300},"https:\u002F\u002Fpathway.com\u002Fdevelopers\u002Fuser-guide\u002Fdata-transformation\u002Fuser-defined-functions",[81],[3418,33302,33304],{"className":3420,"code":33303,"language":3422,"meta":23,"style":23},"@pw.udf\ndef detect_alerts(engine_temp, fuel_level, brake_health):\n    alerts = []\n    if engine_temp > 100:\n        alerts.append(\"High Engine Temp\")\n    if fuel_level \u003C 20:\n        alerts.append(\"Low Fuel Level\")\n    if brake_health \u003C 60:\n        alerts.append(\"Poor Brake Health\")\n    return alerts\n",[3061,33305,33306,33317,33340,33349,33363,33383,33398,33417,33431,33450],{"__ignoreMap":23},[1291,33307,33308,33310,33312,33314],{"class":3427,"line":3428},[1291,33309,19361],{"class":3435},[1291,33311,3841],{"class":3812},[1291,33313,694],{"class":3435},[1291,33315,33316],{"class":3812},"udf\n",[1291,33318,33319,33321,33324,33326,33328,33330,33333,33335,33338],{"class":3427,"line":24},[1291,33320,11398],{"class":7739},[1291,33322,33323],{"class":3812}," detect_alerts",[1291,33325,3816],{"class":3435},[1291,33327,32427],{"class":3819},[1291,33329,3566],{"class":3435},[1291,33331,33332],{"class":3819}," fuel_level",[1291,33334,3566],{"class":3435},[1291,33336,33337],{"class":3819}," brake_health",[1291,33339,11948],{"class":3435},[1291,33341,33342,33345,33347],{"class":3427,"line":675},[1291,33343,33344],{"class":3431},"    alerts ",[1291,33346,3738],{"class":3435},[1291,33348,6554],{"class":3435},[1291,33350,33351,33354,33357,33359,33361],{"class":3427,"line":3542},[1291,33352,33353],{"class":3475},"    if",[1291,33355,33356],{"class":3431}," engine_temp ",[1291,33358,19679],{"class":3435},[1291,33360,32479],{"class":3451},[1291,33362,5243],{"class":3435},[1291,33364,33365,33368,33370,33372,33374,33376,33379,33381],{"class":3427,"line":3547},[1291,33366,33367],{"class":3431},"        alerts",[1291,33369,694],{"class":3435},[1291,33371,6564],{"class":3812},[1291,33373,3816],{"class":3435},[1291,33375,3691],{"class":3435},[1291,33377,33378],{"class":3439},"High Engine Temp",[1291,33380,3691],{"class":3435},[1291,33382,3827],{"class":3435},[1291,33384,33385,33387,33390,33393,33396],{"class":3427,"line":3572},[1291,33386,33353],{"class":3475},[1291,33388,33389],{"class":3431}," fuel_level ",[1291,33391,33392],{"class":3435},"\u003C",[1291,33394,33395],{"class":3451}," 20",[1291,33397,5243],{"class":3435},[1291,33399,33400,33402,33404,33406,33408,33410,33413,33415],{"class":3427,"line":3614},[1291,33401,33367],{"class":3431},[1291,33403,694],{"class":3435},[1291,33405,6564],{"class":3812},[1291,33407,3816],{"class":3435},[1291,33409,3691],{"class":3435},[1291,33411,33412],{"class":3439},"Low Fuel Level",[1291,33414,3691],{"class":3435},[1291,33416,3827],{"class":3435},[1291,33418,33419,33421,33424,33426,33429],{"class":3427,"line":3640},[1291,33420,33353],{"class":3475},[1291,33422,33423],{"class":3431}," brake_health ",[1291,33425,33392],{"class":3435},[1291,33427,33428],{"class":3451}," 60",[1291,33430,5243],{"class":3435},[1291,33432,33433,33435,33437,33439,33441,33443,33446,33448],{"class":3427,"line":3665},[1291,33434,33367],{"class":3431},[1291,33436,694],{"class":3435},[1291,33438,6564],{"class":3812},[1291,33440,3816],{"class":3435},[1291,33442,3691],{"class":3435},[1291,33444,33445],{"class":3439},"Poor Brake Health",[1291,33447,3691],{"class":3435},[1291,33449,3827],{"class":3435},[1291,33451,33452,33454],{"class":3427,"line":3670},[1291,33453,11771],{"class":3475},[1291,33455,33456],{"class":3431}," alerts\n",[665,33458,33459],{"start":3547},[148,33460,33461,4390],{},[169,33462,33463],{},"Apply the UDF and generate multiple alerts",[73,33465,33466,33467,33470,33471,33474,33475,33478],{},"You apply the ",[3061,33468,33469],{},"detect_alerts"," UDF to each row in the ",[3061,33472,33473],{},"telemetry_table",". The UDF returns a list of alerts, which can contain multiple alert messages for each data point.\n",[3061,33476,33477],{},"select()"," is used to create new table alerts with the necessary fields.",[3418,33480,33482],{"className":3420,"code":33481,"language":3422,"meta":23,"style":23},"alerts = telemetry_table.select(\n    vehicle_id=pw.this.vehicle_id,\n    timestamp=pw.this.timestamp,\n    alert_type=detect_alerts(\n        pw.this.engine_temp,\n        pw.this.fuel_level,\n        pw.this.brake_health\n    )\n)\n",[3061,33483,33484,33500,33518,33536,33547,33562,33576,33589,33593],{"__ignoreMap":23},[1291,33485,33486,33489,33491,33494,33496,33498],{"class":3427,"line":3428},[1291,33487,33488],{"class":3431},"alerts ",[1291,33490,3738],{"class":3435},[1291,33492,33493],{"class":3431}," telemetry_table",[1291,33495,694],{"class":3435},[1291,33497,16571],{"class":3812},[1291,33499,3874],{"class":3435},[1291,33501,33502,33504,33506,33508,33510,33512,33514,33516],{"class":3427,"line":24},[1291,33503,33135],{"class":3819},[1291,33505,3738],{"class":3435},[1291,33507,3841],{"class":3812},[1291,33509,694],{"class":3435},[1291,33511,16845],{"class":3457},[1291,33513,694],{"class":3435},[1291,33515,32313],{"class":3457},[1291,33517,4107],{"class":3435},[1291,33519,33520,33522,33524,33526,33528,33530,33532,33534],{"class":3427,"line":675},[1291,33521,18017],{"class":3819},[1291,33523,3738],{"class":3435},[1291,33525,3841],{"class":3812},[1291,33527,694],{"class":3435},[1291,33529,16845],{"class":3457},[1291,33531,694],{"class":3435},[1291,33533,32339],{"class":3457},[1291,33535,4107],{"class":3435},[1291,33537,33538,33541,33543,33545],{"class":3427,"line":3542},[1291,33539,33540],{"class":3819},"    alert_type",[1291,33542,3738],{"class":3435},[1291,33544,33469],{"class":3812},[1291,33546,3874],{"class":3435},[1291,33548,33549,33552,33554,33556,33558,33560],{"class":3427,"line":3547},[1291,33550,33551],{"class":3812},"        pw",[1291,33553,694],{"class":3435},[1291,33555,16845],{"class":3457},[1291,33557,694],{"class":3435},[1291,33559,32427],{"class":3457},[1291,33561,4107],{"class":3435},[1291,33563,33564,33566,33568,33570,33572,33574],{"class":3427,"line":3572},[1291,33565,33551],{"class":3812},[1291,33567,694],{"class":3435},[1291,33569,16845],{"class":3457},[1291,33571,694],{"class":3435},[1291,33573,32460],{"class":3457},[1291,33575,4107],{"class":3435},[1291,33577,33578,33580,33582,33584,33586],{"class":3427,"line":3614},[1291,33579,33551],{"class":3812},[1291,33581,694],{"class":3435},[1291,33583,16845],{"class":3457},[1291,33585,694],{"class":3435},[1291,33587,33588],{"class":3457},"brake_health\n",[1291,33590,33591],{"class":3427,"line":3640},[1291,33592,11996],{"class":3435},[1291,33594,33595],{"class":3427,"line":3665},[1291,33596,3827],{"class":3435},[665,33598,33599],{"start":3572},[148,33600,33601,4390],{},[169,33602,33603],{},"Flatten the alerts and filter out rows with no alerts",[73,33605,33606,33607,33610,33611,33613,33614,33617],{},"Since the ",[3061,33608,33609],{},"alert_type"," column now contains lists of alerts, you need to flatten it to have one alert per row. Then you can filter out any rows where ",[3061,33612,33609],{}," is ",[3061,33615,33616],{},"None"," or empty.",[3418,33619,33621],{"className":3420,"code":33620,"language":3422,"meta":23,"style":23},"alerts = alerts.flatten(pw.this.alert_type).filter(pw.this.alert_type.is_not_none())\n",[3061,33622,33623],{"__ignoreMap":23},[1291,33624,33625,33627,33629,33632,33634,33637,33639,33641,33643,33645,33647,33649,33651,33653,33655,33657,33659,33661,33663,33665,33667,33670],{"class":3427,"line":3428},[1291,33626,33488],{"class":3431},[1291,33628,3738],{"class":3435},[1291,33630,33631],{"class":3431}," alerts",[1291,33633,694],{"class":3435},[1291,33635,33636],{"class":3812},"flatten",[1291,33638,3816],{"class":3435},[1291,33640,3841],{"class":3812},[1291,33642,694],{"class":3435},[1291,33644,16845],{"class":3457},[1291,33646,694],{"class":3435},[1291,33648,33609],{"class":3457},[1291,33650,19244],{"class":3435},[1291,33652,18155],{"class":3812},[1291,33654,3816],{"class":3435},[1291,33656,3841],{"class":3812},[1291,33658,694],{"class":3435},[1291,33660,16845],{"class":3457},[1291,33662,694],{"class":3435},[1291,33664,33609],{"class":3457},[1291,33666,694],{"class":3435},[1291,33668,33669],{"class":3812},"is_not_none",[1291,33671,6237],{"class":3435},[665,33673,33674],{"start":3614},[148,33675,33676,4390],{},[169,33677,33678],{},"Output alerts to another NATS subject",[73,33680,33681],{},"Alerts are then published to the fleet.alerts subject in JSON format.",[3418,33683,33685],{"className":3420,"code":33684,"language":3422,"meta":23,"style":23},"pw.io.nats.write(\n    alerts,\n    uri=\"nats:\u002F\u002F127.0.0.1:4222\",\n    topic=\"fleet.alerts\",\n    format=\"json\"\n)\n",[3061,33686,33687,33705,33712,33726,33740,33752],{"__ignoreMap":23},[1291,33688,33689,33691,33693,33695,33697,33699,33701,33703],{"class":3427,"line":3428},[1291,33690,3841],{"class":3431},[1291,33692,694],{"class":3435},[1291,33694,4078],{"class":3457},[1291,33696,694],{"class":3435},[1291,33698,29835],{"class":3457},[1291,33700,694],{"class":3435},[1291,33702,9700],{"class":3812},[1291,33704,3874],{"class":3435},[1291,33706,33707,33710],{"class":3427,"line":24},[1291,33708,33709],{"class":3812},"    alerts",[1291,33711,4107],{"class":3435},[1291,33713,33714,33716,33718,33720,33722,33724],{"class":3427,"line":675},[1291,33715,31409],{"class":3819},[1291,33717,3738],{"class":3435},[1291,33719,3691],{"class":3435},[1291,33721,31416],{"class":3439},[1291,33723,3691],{"class":3435},[1291,33725,4107],{"class":3435},[1291,33727,33728,33730,33732,33734,33736,33738],{"class":3427,"line":3542},[1291,33729,31425],{"class":3819},[1291,33731,3738],{"class":3435},[1291,33733,3691],{"class":3435},[1291,33735,32053],{"class":3439},[1291,33737,3691],{"class":3435},[1291,33739,4107],{"class":3435},[1291,33741,33742,33744,33746,33748,33750],{"class":3427,"line":3547},[1291,33743,4112],{"class":3819},[1291,33745,3738],{"class":3435},[1291,33747,3691],{"class":3435},[1291,33749,8623],{"class":3439},[1291,33751,3746],{"class":3435},[1291,33753,33754],{"class":3427,"line":3572},[1291,33755,3827],{"class":3435},[665,33757,33758],{"start":3640},[148,33759,33760,4390],{},[169,33761,31624],{},[3418,33763,33764],{"className":3420,"code":31627,"language":3422,"meta":23,"style":23},[3061,33765,33766],{"__ignoreMap":23},[1291,33767,33768,33770,33772,33774],{"class":3427,"line":3428},[1291,33769,3841],{"class":3431},[1291,33771,694],{"class":3435},[1291,33773,11274],{"class":3812},[1291,33775,4871],{"class":3435},[3189,33777,33779],{"id":33778},"complete-telemetry-processor-code-telemetry_processorpy","Complete Telemetry Processor Code (telemetry_processor.py)",[3418,33781,33784],{"className":3420,"code":33782,"filename":33783,"language":3422,"meta":23,"style":23},"import pathway as pw\n\n# Define the telemetry schema\nclass TelemetrySchema(pw.Schema):\n   vehicle_id: str\n   timestamp: str\n   lat: float\n   lon: float\n   engine_temp: int\n   fuel_level: int\n   brake_health: int\n\n# Ingest telemetry data from NATS\ntelemetry_table = pw.io.nats.read(\n   uri=\"nats:\u002F\u002F127.0.0.1:4222\",\n   topic=\"fleet.telemetry\",\n   format=\"json\",\n   schema=TelemetrySchema\n)\n\n# Define a UDF for detecting alerts with if conditions\n@pw.udf\ndef detect_alerts(engine_temp, fuel_level, brake_health):\n   alerts = []\n   if engine_temp > 100:\n       alerts.append(\"High Engine Temp\")\n   if fuel_level \u003C 20:\n       alerts.append(\"Low Fuel Level\")\n   if brake_health \u003C 60:\n       alerts.append(\"Poor Brake Health\")\n   return alerts\n\n# Apply the UDF and generate multiple alerts\nalerts = telemetry_table.select(\n   vehicle_id=pw.this.vehicle_id,\n   timestamp=pw.this.timestamp,\n   alert_type=detect_alerts(\n       pw.this.engine_temp,\n       pw.this.fuel_level,\n       pw.this.brake_health\n   )\n)\n\n# Filter rows with no alerts\nalerts = alerts.flatten(pw.this.alert_type).filter(pw.this.alert_type.is_not_none())\n\n# Output alerts to another NATS subject\npw.io.nats.write(\n   alerts.select(\n       vehicle_id=pw.this.vehicle_id,\n       timestamp=pw.this.timestamp,\n       alert_type=pw.this.alert_type\n   ),\n   uri=\"nats:\u002F\u002F127.0.0.1:4222\",\n   topic=\"fleet.alerts\",\n   format=\"json\"\n)\n\n# Run the Pathway pipeline\npw.run()\n","telemetry_processor.py",[3061,33785,33786,33796,33800,33805,33821,33830,33839,33848,33857,33866,33875,33884,33888,33893,33915,33930,33945,33960,33969,33973,33977,33982,33992,34012,34021,34034,34053,34065,34083,34095,34113,34120,34124,34129,34143,34161,34179,34190,34205,34219,34231,34236,34240,34244,34249,34295,34299,34304,34322,34333,34352,34371,34389,34394,34408,34422,34434,34438,34442,34447],{"__ignoreMap":23},[1291,33787,33788,33790,33792,33794],{"class":3427,"line":3428},[1291,33789,3476],{"class":3475},[1291,33791,3533],{"class":3431},[1291,33793,3536],{"class":3475},[1291,33795,3539],{"class":3431},[1291,33797,33798],{"class":3427,"line":24},[1291,33799,3526],{"emptyLinePlaceholder":35},[1291,33801,33802],{"class":3427,"line":675},[1291,33803,33804],{"class":3673},"# Define the telemetry schema\n",[1291,33806,33807,33809,33811,33813,33815,33817,33819],{"class":3427,"line":3542},[1291,33808,16356],{"class":7739},[1291,33810,33120],{"class":6356},[1291,33812,3816],{"class":3435},[1291,33814,3841],{"class":6356},[1291,33816,694],{"class":3435},[1291,33818,16368],{"class":6356},[1291,33820,11948],{"class":3435},[1291,33822,33823,33826,33828],{"class":3427,"line":3547},[1291,33824,33825],{"class":3431},"   vehicle_id",[1291,33827,4390],{"class":3435},[1291,33829,16380],{"class":6356},[1291,33831,33832,33835,33837],{"class":3427,"line":3572},[1291,33833,33834],{"class":3431},"   timestamp",[1291,33836,4390],{"class":3435},[1291,33838,16380],{"class":6356},[1291,33840,33841,33844,33846],{"class":3427,"line":3614},[1291,33842,33843],{"class":3431},"   lat",[1291,33845,4390],{"class":3435},[1291,33847,17994],{"class":6356},[1291,33849,33850,33853,33855],{"class":3427,"line":3640},[1291,33851,33852],{"class":3431},"   lon",[1291,33854,4390],{"class":3435},[1291,33856,17994],{"class":6356},[1291,33858,33859,33862,33864],{"class":3427,"line":3665},[1291,33860,33861],{"class":3431},"   engine_temp",[1291,33863,4390],{"class":3435},[1291,33865,18022],{"class":6356},[1291,33867,33868,33871,33873],{"class":3427,"line":3670},[1291,33869,33870],{"class":3431},"   fuel_level",[1291,33872,4390],{"class":3435},[1291,33874,18022],{"class":6356},[1291,33876,33877,33880,33882],{"class":3427,"line":3677},[1291,33878,33879],{"class":3431},"   brake_health",[1291,33881,4390],{"class":3435},[1291,33883,18022],{"class":6356},[1291,33885,33886],{"class":3427,"line":3877},[1291,33887,3526],{"emptyLinePlaceholder":35},[1291,33889,33890],{"class":3427,"line":3916},[1291,33891,33892],{"class":3673},"# Ingest telemetry data from NATS\n",[1291,33894,33895,33897,33899,33901,33903,33905,33907,33909,33911,33913],{"class":3427,"line":4519},[1291,33896,33213],{"class":3431},[1291,33898,3738],{"class":3435},[1291,33900,4073],{"class":3431},[1291,33902,694],{"class":3435},[1291,33904,4078],{"class":3457},[1291,33906,694],{"class":3435},[1291,33908,29835],{"class":3457},[1291,33910,694],{"class":3435},[1291,33912,4088],{"class":3812},[1291,33914,3874],{"class":3435},[1291,33916,33917,33920,33922,33924,33926,33928],{"class":3427,"line":6038},[1291,33918,33919],{"class":3819},"   uri",[1291,33921,3738],{"class":3435},[1291,33923,3691],{"class":3435},[1291,33925,31416],{"class":3439},[1291,33927,3691],{"class":3435},[1291,33929,4107],{"class":3435},[1291,33931,33932,33935,33937,33939,33941,33943],{"class":3427,"line":6043},[1291,33933,33934],{"class":3819},"   topic",[1291,33936,3738],{"class":3435},[1291,33938,3691],{"class":3435},[1291,33940,32034],{"class":3439},[1291,33942,3691],{"class":3435},[1291,33944,4107],{"class":3435},[1291,33946,33947,33950,33952,33954,33956,33958],{"class":3427,"line":6066},[1291,33948,33949],{"class":3819},"   format",[1291,33951,3738],{"class":3435},[1291,33953,3691],{"class":3435},[1291,33955,8623],{"class":3439},[1291,33957,3691],{"class":3435},[1291,33959,4107],{"class":3435},[1291,33961,33962,33965,33967],{"class":3427,"line":6078},[1291,33963,33964],{"class":3819},"   schema",[1291,33966,3738],{"class":3435},[1291,33968,33282],{"class":3812},[1291,33970,33971],{"class":3427,"line":6089},[1291,33972,3827],{"class":3435},[1291,33974,33975],{"class":3427,"line":6124},[1291,33976,3526],{"emptyLinePlaceholder":35},[1291,33978,33979],{"class":3427,"line":6133},[1291,33980,33981],{"class":3673},"# Define a UDF for detecting alerts with if conditions\n",[1291,33983,33984,33986,33988,33990],{"class":3427,"line":6141},[1291,33985,19361],{"class":3435},[1291,33987,3841],{"class":3812},[1291,33989,694],{"class":3435},[1291,33991,33316],{"class":3812},[1291,33993,33994,33996,33998,34000,34002,34004,34006,34008,34010],{"class":3427,"line":6151},[1291,33995,11398],{"class":7739},[1291,33997,33323],{"class":3812},[1291,33999,3816],{"class":3435},[1291,34001,32427],{"class":3819},[1291,34003,3566],{"class":3435},[1291,34005,33332],{"class":3819},[1291,34007,3566],{"class":3435},[1291,34009,33337],{"class":3819},[1291,34011,11948],{"class":3435},[1291,34013,34014,34017,34019],{"class":3427,"line":6923},[1291,34015,34016],{"class":3431},"   alerts ",[1291,34018,3738],{"class":3435},[1291,34020,6554],{"class":3435},[1291,34022,34023,34026,34028,34030,34032],{"class":3427,"line":6928},[1291,34024,34025],{"class":3475},"   if",[1291,34027,33356],{"class":3431},[1291,34029,19679],{"class":3435},[1291,34031,32479],{"class":3451},[1291,34033,5243],{"class":3435},[1291,34035,34036,34039,34041,34043,34045,34047,34049,34051],{"class":3427,"line":6934},[1291,34037,34038],{"class":3431},"       alerts",[1291,34040,694],{"class":3435},[1291,34042,6564],{"class":3812},[1291,34044,3816],{"class":3435},[1291,34046,3691],{"class":3435},[1291,34048,33378],{"class":3439},[1291,34050,3691],{"class":3435},[1291,34052,3827],{"class":3435},[1291,34054,34055,34057,34059,34061,34063],{"class":3427,"line":6940},[1291,34056,34025],{"class":3475},[1291,34058,33389],{"class":3431},[1291,34060,33392],{"class":3435},[1291,34062,33395],{"class":3451},[1291,34064,5243],{"class":3435},[1291,34066,34067,34069,34071,34073,34075,34077,34079,34081],{"class":3427,"line":6952},[1291,34068,34038],{"class":3431},[1291,34070,694],{"class":3435},[1291,34072,6564],{"class":3812},[1291,34074,3816],{"class":3435},[1291,34076,3691],{"class":3435},[1291,34078,33412],{"class":3439},[1291,34080,3691],{"class":3435},[1291,34082,3827],{"class":3435},[1291,34084,34085,34087,34089,34091,34093],{"class":3427,"line":6984},[1291,34086,34025],{"class":3475},[1291,34088,33423],{"class":3431},[1291,34090,33392],{"class":3435},[1291,34092,33428],{"class":3451},[1291,34094,5243],{"class":3435},[1291,34096,34097,34099,34101,34103,34105,34107,34109,34111],{"class":3427,"line":7996},[1291,34098,34038],{"class":3431},[1291,34100,694],{"class":3435},[1291,34102,6564],{"class":3812},[1291,34104,3816],{"class":3435},[1291,34106,3691],{"class":3435},[1291,34108,33445],{"class":3439},[1291,34110,3691],{"class":3435},[1291,34112,3827],{"class":3435},[1291,34114,34115,34118],{"class":3427,"line":8007},[1291,34116,34117],{"class":3475},"   return",[1291,34119,33456],{"class":3431},[1291,34121,34122],{"class":3427,"line":8018},[1291,34123,3526],{"emptyLinePlaceholder":35},[1291,34125,34126],{"class":3427,"line":8029},[1291,34127,34128],{"class":3673},"# Apply the UDF and generate multiple alerts\n",[1291,34130,34131,34133,34135,34137,34139,34141],{"class":3427,"line":8040},[1291,34132,33488],{"class":3431},[1291,34134,3738],{"class":3435},[1291,34136,33493],{"class":3431},[1291,34138,694],{"class":3435},[1291,34140,16571],{"class":3812},[1291,34142,3874],{"class":3435},[1291,34144,34145,34147,34149,34151,34153,34155,34157,34159],{"class":3427,"line":8051},[1291,34146,33825],{"class":3819},[1291,34148,3738],{"class":3435},[1291,34150,3841],{"class":3812},[1291,34152,694],{"class":3435},[1291,34154,16845],{"class":3457},[1291,34156,694],{"class":3435},[1291,34158,32313],{"class":3457},[1291,34160,4107],{"class":3435},[1291,34162,34163,34165,34167,34169,34171,34173,34175,34177],{"class":3427,"line":8057},[1291,34164,33834],{"class":3819},[1291,34166,3738],{"class":3435},[1291,34168,3841],{"class":3812},[1291,34170,694],{"class":3435},[1291,34172,16845],{"class":3457},[1291,34174,694],{"class":3435},[1291,34176,32339],{"class":3457},[1291,34178,4107],{"class":3435},[1291,34180,34181,34184,34186,34188],{"class":3427,"line":8068},[1291,34182,34183],{"class":3819},"   alert_type",[1291,34185,3738],{"class":3435},[1291,34187,33469],{"class":3812},[1291,34189,3874],{"class":3435},[1291,34191,34192,34195,34197,34199,34201,34203],{"class":3427,"line":8079},[1291,34193,34194],{"class":3812},"       pw",[1291,34196,694],{"class":3435},[1291,34198,16845],{"class":3457},[1291,34200,694],{"class":3435},[1291,34202,32427],{"class":3457},[1291,34204,4107],{"class":3435},[1291,34206,34207,34209,34211,34213,34215,34217],{"class":3427,"line":8090},[1291,34208,34194],{"class":3812},[1291,34210,694],{"class":3435},[1291,34212,16845],{"class":3457},[1291,34214,694],{"class":3435},[1291,34216,32460],{"class":3457},[1291,34218,4107],{"class":3435},[1291,34220,34221,34223,34225,34227,34229],{"class":3427,"line":8101},[1291,34222,34194],{"class":3812},[1291,34224,694],{"class":3435},[1291,34226,16845],{"class":3457},[1291,34228,694],{"class":3435},[1291,34230,33588],{"class":3457},[1291,34232,34233],{"class":3427,"line":8112},[1291,34234,34235],{"class":3435},"   )\n",[1291,34237,34238],{"class":3427,"line":8117},[1291,34239,3827],{"class":3435},[1291,34241,34242],{"class":3427,"line":8128},[1291,34243,3526],{"emptyLinePlaceholder":35},[1291,34245,34246],{"class":3427,"line":8139},[1291,34247,34248],{"class":3673},"# Filter rows with no alerts\n",[1291,34250,34251,34253,34255,34257,34259,34261,34263,34265,34267,34269,34271,34273,34275,34277,34279,34281,34283,34285,34287,34289,34291,34293],{"class":3427,"line":8150},[1291,34252,33488],{"class":3431},[1291,34254,3738],{"class":3435},[1291,34256,33631],{"class":3431},[1291,34258,694],{"class":3435},[1291,34260,33636],{"class":3812},[1291,34262,3816],{"class":3435},[1291,34264,3841],{"class":3812},[1291,34266,694],{"class":3435},[1291,34268,16845],{"class":3457},[1291,34270,694],{"class":3435},[1291,34272,33609],{"class":3457},[1291,34274,19244],{"class":3435},[1291,34276,18155],{"class":3812},[1291,34278,3816],{"class":3435},[1291,34280,3841],{"class":3812},[1291,34282,694],{"class":3435},[1291,34284,16845],{"class":3457},[1291,34286,694],{"class":3435},[1291,34288,33609],{"class":3457},[1291,34290,694],{"class":3435},[1291,34292,33669],{"class":3812},[1291,34294,6237],{"class":3435},[1291,34296,34297],{"class":3427,"line":8156},[1291,34298,3526],{"emptyLinePlaceholder":35},[1291,34300,34301],{"class":3427,"line":8162},[1291,34302,34303],{"class":3673},"# Output alerts to another NATS subject\n",[1291,34305,34306,34308,34310,34312,34314,34316,34318,34320],{"class":3427,"line":8168},[1291,34307,3841],{"class":3431},[1291,34309,694],{"class":3435},[1291,34311,4078],{"class":3457},[1291,34313,694],{"class":3435},[1291,34315,29835],{"class":3457},[1291,34317,694],{"class":3435},[1291,34319,9700],{"class":3812},[1291,34321,3874],{"class":3435},[1291,34323,34324,34327,34329,34331],{"class":3427,"line":8174},[1291,34325,34326],{"class":3812},"   alerts",[1291,34328,694],{"class":3435},[1291,34330,16571],{"class":3812},[1291,34332,3874],{"class":3435},[1291,34334,34335,34338,34340,34342,34344,34346,34348,34350],{"class":3427,"line":8180},[1291,34336,34337],{"class":3819},"       vehicle_id",[1291,34339,3738],{"class":3435},[1291,34341,3841],{"class":3812},[1291,34343,694],{"class":3435},[1291,34345,16845],{"class":3457},[1291,34347,694],{"class":3435},[1291,34349,32313],{"class":3457},[1291,34351,4107],{"class":3435},[1291,34353,34354,34357,34359,34361,34363,34365,34367,34369],{"class":3427,"line":8186},[1291,34355,34356],{"class":3819},"       timestamp",[1291,34358,3738],{"class":3435},[1291,34360,3841],{"class":3812},[1291,34362,694],{"class":3435},[1291,34364,16845],{"class":3457},[1291,34366,694],{"class":3435},[1291,34368,32339],{"class":3457},[1291,34370,4107],{"class":3435},[1291,34372,34373,34376,34378,34380,34382,34384,34386],{"class":3427,"line":8191},[1291,34374,34375],{"class":3819},"       alert_type",[1291,34377,3738],{"class":3435},[1291,34379,3841],{"class":3812},[1291,34381,694],{"class":3435},[1291,34383,16845],{"class":3457},[1291,34385,694],{"class":3435},[1291,34387,34388],{"class":3457},"alert_type\n",[1291,34390,34391],{"class":3427,"line":8197},[1291,34392,34393],{"class":3435},"   ),\n",[1291,34395,34396,34398,34400,34402,34404,34406],{"class":3427,"line":8203},[1291,34397,33919],{"class":3819},[1291,34399,3738],{"class":3435},[1291,34401,3691],{"class":3435},[1291,34403,31416],{"class":3439},[1291,34405,3691],{"class":3435},[1291,34407,4107],{"class":3435},[1291,34409,34410,34412,34414,34416,34418,34420],{"class":3427,"line":8209},[1291,34411,33934],{"class":3819},[1291,34413,3738],{"class":3435},[1291,34415,3691],{"class":3435},[1291,34417,32053],{"class":3439},[1291,34419,3691],{"class":3435},[1291,34421,4107],{"class":3435},[1291,34423,34424,34426,34428,34430,34432],{"class":3427,"line":8214},[1291,34425,33949],{"class":3819},[1291,34427,3738],{"class":3435},[1291,34429,3691],{"class":3435},[1291,34431,8623],{"class":3439},[1291,34433,3746],{"class":3435},[1291,34435,34436],{"class":3427,"line":8220},[1291,34437,3827],{"class":3435},[1291,34439,34440],{"class":3427,"line":8226},[1291,34441,3526],{"emptyLinePlaceholder":35},[1291,34443,34444],{"class":3427,"line":8231},[1291,34445,34446],{"class":3673},"# Run the Pathway pipeline\n",[1291,34448,34449,34451,34453,34455],{"class":3427,"line":8237},[1291,34450,3841],{"class":3431},[1291,34452,694],{"class":3435},[1291,34454,11274],{"class":3812},[1291,34456,4871],{"class":3435},[140,34458,34460],{"id":34459},"subscribing-to-alerts","Subscribing to Alerts",[73,34462,34463,34464,34466],{},"In a real-world application, other services or systems would subscribe to the ",[3061,34465,32053],{}," subject to receive and act upon real-time alerts generated by the Pathway anomaly detection script. These services might include dashboards, notification systems, or automated workflows that handle critical events. Depending on the severity of an alert, the system could escalate it appropriately—for example, by sending an SMS or making a phone call for high-priority issues, while less critical alerts might be sent via email or logged for later review. Alerts could also be duplicated across popular messaging platforms to ensure they reach the relevant stakeholders promptly.",[73,34468,34469,34470,34472],{},"For the sake of this tutorial, to demonstrate that the data pipeline works correctly, it is sufficient to have a simple subscriber script that listens to the ",[3061,34471,32053],{}," subject and prints the received alerts to the console.",[3189,34474,34476],{"id":34475},"complete-code-for-the-subscriber-alerts_subscriberpy","Complete code for the subscriber (alerts_subscriber.py):",[3418,34478,34481],{"className":3420,"code":34479,"filename":34480,"language":3422,"meta":23,"style":23},"import asyncio\nimport nats\nimport json\n\nasync def receive_alerts():\n    nc = await nats.connect(\"nats:\u002F\u002Flocalhost:4222\")\n\n    async def alert_handler(msg):\n        alert = json.loads(msg.data.decode())\n        print(f\"ALERT: Vehicle {alert['vehicle_id']} - {alert['alert_type']} at {alert['timestamp']}\")\n\n    await nc.subscribe(\"fleet.alerts\", cb=alert_handler)\n    print(\"Subscribed to 'fleet.alerts' subject.\")\n\n    while True:\n        await asyncio.sleep(1)\n\nasyncio.run(receive_alerts())\n","alerts_subscriber.py",[3061,34482,34483,34489,34495,34501,34505,34516,34540,34544,34559,34587,34655,34659,34688,34703,34707,34713,34729,34733],{"__ignoreMap":23},[1291,34484,34485,34487],{"class":3427,"line":3428},[1291,34486,3476],{"class":3475},[1291,34488,29848],{"class":3431},[1291,34490,34491,34493],{"class":3427,"line":24},[1291,34492,3476],{"class":3475},[1291,34494,29855],{"class":3431},[1291,34496,34497,34499],{"class":3427,"line":675},[1291,34498,3476],{"class":3475},[1291,34500,28159],{"class":3431},[1291,34502,34503],{"class":3427,"line":3542},[1291,34504,3526],{"emptyLinePlaceholder":35},[1291,34506,34507,34509,34511,34514],{"class":3427,"line":3547},[1291,34508,9369],{"class":7739},[1291,34510,9372],{"class":7739},[1291,34512,34513],{"class":3812}," receive_alerts",[1291,34515,27496],{"class":3435},[1291,34517,34518,34520,34522,34524,34526,34528,34530,34532,34534,34536,34538],{"class":3427,"line":3572},[1291,34519,30125],{"class":3431},[1291,34521,3738],{"class":3435},[1291,34523,9511],{"class":3475},[1291,34525,29916],{"class":3431},[1291,34527,694],{"class":3435},[1291,34529,29921],{"class":3812},[1291,34531,3816],{"class":3435},[1291,34533,3691],{"class":3435},[1291,34535,29928],{"class":3439},[1291,34537,3691],{"class":3435},[1291,34539,3827],{"class":3435},[1291,34541,34542],{"class":3427,"line":3614},[1291,34543,3526],{"emptyLinePlaceholder":35},[1291,34545,34546,34548,34550,34553,34555,34557],{"class":3427,"line":3640},[1291,34547,30818],{"class":7739},[1291,34549,9372],{"class":7739},[1291,34551,34552],{"class":3812}," alert_handler",[1291,34554,3816],{"class":3435},[1291,34556,30372],{"class":3819},[1291,34558,11948],{"class":3435},[1291,34560,34561,34564,34566,34568,34570,34573,34575,34577,34579,34581,34583,34585],{"class":3427,"line":3665},[1291,34562,34563],{"class":3431},"        alert ",[1291,34565,3738],{"class":3435},[1291,34567,32557],{"class":3431},[1291,34569,694],{"class":3435},[1291,34571,34572],{"class":3812},"loads",[1291,34574,3816],{"class":3435},[1291,34576,30372],{"class":3812},[1291,34578,694],{"class":3435},[1291,34580,3935],{"class":3457},[1291,34582,694],{"class":3435},[1291,34584,30411],{"class":3812},[1291,34586,6237],{"class":3435},[1291,34588,34589,34591,34593,34595,34598,34600,34602,34604,34606,34608,34610,34612,34614,34616,34618,34620,34622,34624,34626,34628,34630,34632,34635,34637,34639,34641,34643,34645,34647,34649,34651,34653],{"class":3427,"line":3670},[1291,34590,30833],{"class":3812},[1291,34592,3816],{"class":3435},[1291,34594,9643],{"class":7739},[1291,34596,34597],{"class":3439},"\"ALERT: Vehicle ",[1291,34599,8770],{"class":3451},[1291,34601,30037],{"class":3812},[1291,34603,3688],{"class":3435},[1291,34605,3436],{"class":3435},[1291,34607,32313],{"class":3439},[1291,34609,3436],{"class":3435},[1291,34611,3699],{"class":3435},[1291,34613,9671],{"class":3451},[1291,34615,2578],{"class":3439},[1291,34617,8770],{"class":3451},[1291,34619,30037],{"class":3812},[1291,34621,3688],{"class":3435},[1291,34623,3436],{"class":3435},[1291,34625,33609],{"class":3439},[1291,34627,3436],{"class":3435},[1291,34629,3699],{"class":3435},[1291,34631,9671],{"class":3451},[1291,34633,34634],{"class":3439}," at ",[1291,34636,8770],{"class":3451},[1291,34638,30037],{"class":3812},[1291,34640,3688],{"class":3435},[1291,34642,3436],{"class":3435},[1291,34644,32339],{"class":3439},[1291,34646,3436],{"class":3435},[1291,34648,3699],{"class":3435},[1291,34650,9671],{"class":3451},[1291,34652,3691],{"class":3439},[1291,34654,3827],{"class":3435},[1291,34656,34657],{"class":3427,"line":3677},[1291,34658,3526],{"emptyLinePlaceholder":35},[1291,34660,34661,34663,34665,34667,34669,34671,34673,34675,34677,34679,34681,34683,34686],{"class":3427,"line":3877},[1291,34662,30160],{"class":3475},[1291,34664,29968],{"class":3431},[1291,34666,694],{"class":3435},[1291,34668,30453],{"class":3812},[1291,34670,3816],{"class":3435},[1291,34672,3691],{"class":3435},[1291,34674,32053],{"class":3439},[1291,34676,3691],{"class":3435},[1291,34678,3566],{"class":3435},[1291,34680,30462],{"class":3819},[1291,34682,3738],{"class":3435},[1291,34684,34685],{"class":3812},"alert_handler",[1291,34687,3827],{"class":3435},[1291,34689,34690,34692,34694,34696,34699,34701],{"class":3427,"line":3916},[1291,34691,27099],{"class":3812},[1291,34693,3816],{"class":3435},[1291,34695,3691],{"class":3435},[1291,34697,34698],{"class":3439},"Subscribed to 'fleet.alerts' subject.",[1291,34700,3691],{"class":3435},[1291,34702,3827],{"class":3435},[1291,34704,34705],{"class":3427,"line":4519},[1291,34706,3526],{"emptyLinePlaceholder":35},[1291,34708,34709,34711],{"class":3427,"line":6038},[1291,34710,30938],{"class":3475},[1291,34712,30941],{"class":3435},[1291,34714,34715,34717,34719,34721,34723,34725,34727],{"class":3427,"line":6043},[1291,34716,30946],{"class":3475},[1291,34718,30529],{"class":3431},[1291,34720,694],{"class":3435},[1291,34722,30534],{"class":3812},[1291,34724,3816],{"class":3435},[1291,34726,24626],{"class":3451},[1291,34728,3827],{"class":3435},[1291,34730,34731],{"class":3427,"line":6066},[1291,34732,3526],{"emptyLinePlaceholder":35},[1291,34734,34735,34737,34739,34741,34743,34746],{"class":3427,"line":6078},[1291,34736,29831],{"class":3431},[1291,34738,694],{"class":3435},[1291,34740,11274],{"class":3812},[1291,34742,3816],{"class":3435},[1291,34744,34745],{"class":3812},"receive_alerts",[1291,34747,6237],{"class":3435},[3189,34749,34751],{"id":34750},"testing-your-setup","Testing your setup",[73,34753,34754],{},"It’s finally time to test your setup. As mentioned previously, you need to run the subscriber script before running the publisher scripts.",[73,34756,34757],{},"You can start running the scripts above by following these steps:",[665,34759,34760],{},[148,34761,34762,34763,34765],{},"Run your subscriber script alerts_subscriber.py that subscribes to ",[3061,34764,32053],{}," subject:",[3418,34767,34769],{"className":6347,"code":34768,"language":6349,"meta":23,"style":23},"python alerts_subscriber.py\n",[3061,34770,34771],{"__ignoreMap":23},[1291,34772,34773,34775],{"class":3427,"line":3428},[1291,34774,3422],{"class":6356},[1291,34776,34777],{"class":3439}," alerts_subscriber.py\n",[665,34779,34780],{"start":24},[148,34781,34782,34783,34786,34787,34765],{},"Run your Pathway anomaly detection script that reads from ",[3061,34784,34785],{},"fleets.telemetry"," subject and writes to ",[3061,34788,34789],{},"fleets.alerts",[3418,34791,34792],{"className":6347,"code":34768,"language":6349,"meta":23,"style":23},[3061,34793,34794],{"__ignoreMap":23},[1291,34795,34796,34798],{"class":3427,"line":3428},[1291,34797,3422],{"class":6356},[1291,34799,34777],{"class":3439},[665,34801,34802],{"start":675},[148,34803,34804,34805,34765],{},"Now run your publisher script that stimulates telemetry data and is pushing to ",[3061,34806,34785],{},[3418,34808,34809],{"className":6347,"code":34768,"language":6349,"meta":23,"style":23},[3061,34810,34811],{"__ignoreMap":23},[1291,34812,34813,34815],{"class":3427,"line":3428},[1291,34814,3422],{"class":6356},[1291,34816,34777],{"class":3439},[73,34818,34819],{},"In the output terminal for the alerts_subscriber.py in step 1 above, you should now be able to see alerts similar to the ones below.",[3418,34821,34823],{"className":6347,"code":34822,"language":6349,"meta":23,"style":23},"ALERT: Vehicle TRUCK-3 - High Engine Temp at 2024-11-25T12:43:33.809829\nALERT: Vehicle TRUCK-1 - High Engine Temp at 2024-11-25T12:43:34.811335\nALERT: Vehicle TRUCK-2 - High Engine Temp at 2024-11-25T12:43:35.812677\n",[3061,34824,34825,34853,34875],{"__ignoreMap":23},[1291,34826,34827,34830,34833,34836,34838,34841,34844,34847,34850],{"class":3427,"line":3428},[1291,34828,34829],{"class":6356},"ALERT:",[1291,34831,34832],{"class":3439}," Vehicle",[1291,34834,34835],{"class":3439}," TRUCK-3",[1291,34837,5838],{"class":3439},[1291,34839,34840],{"class":3439}," High",[1291,34842,34843],{"class":3439}," Engine",[1291,34845,34846],{"class":3439}," Temp",[1291,34848,34849],{"class":3439}," at",[1291,34851,34852],{"class":3439}," 2024-11-25T12:43:33.809829\n",[1291,34854,34855,34857,34859,34862,34864,34866,34868,34870,34872],{"class":3427,"line":24},[1291,34856,34829],{"class":6356},[1291,34858,34832],{"class":3439},[1291,34860,34861],{"class":3439}," TRUCK-1",[1291,34863,5838],{"class":3439},[1291,34865,34840],{"class":3439},[1291,34867,34843],{"class":3439},[1291,34869,34846],{"class":3439},[1291,34871,34849],{"class":3439},[1291,34873,34874],{"class":3439}," 2024-11-25T12:43:34.811335\n",[1291,34876,34877,34879,34881,34884,34886,34888,34890,34892,34894],{"class":3427,"line":675},[1291,34878,34829],{"class":6356},[1291,34880,34832],{"class":3439},[1291,34882,34883],{"class":3439}," TRUCK-2",[1291,34885,5838],{"class":3439},[1291,34887,34840],{"class":3439},[1291,34889,34843],{"class":3439},[1291,34891,34846],{"class":3439},[1291,34893,34849],{"class":3439},[1291,34895,34896],{"class":3439}," 2024-11-25T12:43:35.812677\n",[140,34898,8794],{"id":8793},[73,34900,34901],{},"In this tutorial, you have learned how to build a real-time data processing system by integrating NATS with Pathway Live Data Framework as powerful alternatives to Kafka and Flink. Starting by setting up basic publishers and subscribers using NATS and Python, then connecting Pathway to NATS to process messages in real time. The fleet monitoring use case, has shown how to ingest telemetry data, identify critical conditions, and generate alerts.",[73,34903,34904],{},"Below is the recap of the key points of the technologies discussed in this article.",[3189,34906,34908],{"id":34907},"benefits-of-using-pathway-live-data-framework-the-flink-alternative-here-and-its-nats-connector","Benefits of Using Pathway Live Data Framework (the Flink alternative here) and its NATS Connector:",[145,34910,34911,34917,34923,34928,34934],{},[148,34912,34913,34916],{},[169,34914,34915],{},"Seamless Integration",": The framework’s native NATS connectors allow you to directly ingest data from NATS subjects into Pathway Live Data Framework without writing custom integration code. This streamlines development and lets you focus on processing logic rather than data plumbing.",[148,34918,34919,34922],{},[169,34920,34921],{},"High Performance and Low Latency",": Both NATS and Pathway Live Data Framework are designed for speed. NATS handles rapid message delivery, while the framework processes streams in real time. This ensures that data is analyzed and alerts are generated almost instantly.",[148,34924,34925,34927],{},[169,34926,22781],{},": NATS supports clustering, and Pathway Live Data Framework can distribute processing across multiple nodes. This means your system can handle increased data volumes as your application grows.",[148,34929,34930,34933],{},[169,34931,34932],{},"Flexibility in Data Formats",": The framework’s NATS connectors handle various data formats, including JSON, plaintext, and raw bytes. This allows you to work with the data format that best suits your application.",[148,34935,34936,34939],{},[169,34937,34938],{},"Reliability and Fault Tolerance",": NATS offers features like message acknowledgment and clustering for high availability. The Pathway Live Data Framework can recover state after failures, ensuring continuity in processing.",[3189,34941,34943],{"id":34942},"advantages-of-nats-the-kafka-alternative","Advantages of NATS – the Kafka alternative:",[145,34945,34946,34952,34958],{},[148,34947,34948,34951],{},[169,34949,34950],{},"Lightweight and Efficient",": NATS is a lightweight messaging system that provides fast and reliable communication between distributed systems.",[148,34953,34954,34957],{},[169,34955,34956],{},"Simple Publish\u002FSubscribe Model",": Its straightforward pub\u002Fsub model makes it easy to implement asynchronous communication.",[148,34959,34960,34963],{},[169,34961,34962],{},"Flexible Topologies",": NATS supports various communication patterns and can be deployed in diverse architectures.",[73,34965,34966],{},"This way, by leveraging the strengths of both NATS and Pathway, you can build scalable, efficient, and reliable real-time data processing systems.",[140,34968,34970],{"id":34969},"additional-resources","Additional Resources",[145,34972,34973,34983,34992,35002,35011,35020],{},[148,34974,34975,27115,34978],{},[169,34976,34977],{},"NATS Documentation",[77,34979,34982],{"href":34980,"rel":34981},"https:\u002F\u002Fdocs.nats.io\u002F",[81],"docs.nats.io",[148,34984,34985,27115,34988],{},[169,34986,34987],{},"NATS Python Client",[77,34989,34991],{"href":29792,"rel":34990},[81],"nats.py GitHub",[148,34993,34994,27115,34997],{},[169,34995,34996],{},"Pathway Documentation",[77,34998,35001],{"href":34999,"rel":35000},"https:\u002F\u002Fpathway.com\u002Fdevelopers\u002Fuser-guide\u002Fintroduction\u002Fwelcome",[81],"pathway.com\u002Fdocs",[148,35003,35004,27115,35007],{},[169,35005,35006],{},"Pathway GitHub Repository",[77,35008,35010],{"href":29429,"rel":35009},[81],"pathwaycom\u002Fpathway",[148,35012,35013,27115,35016],{},[169,35014,35015],{},"Synadia Multi-cloud NATS.io Platform Docs",[77,35017,35018],{"href":35018,"rel":35019},"https:\u002F\u002Fdocs.synadia.com\u002Fcloud",[81],[148,35021,35022,27115,35025],{},[169,35023,35024],{},"AsyncIO in Python",[77,35026,35029],{"href":35027,"rel":35028},"https:\u002F\u002Fdocs.python.org\u002F3\u002Flibrary\u002Fasyncio.html",[81],"Python AsyncIO Documentation",[5019,35031,35032],{},"html pre.shiki code .s5Dmg, html code.shiki .s5Dmg{--shiki-default:#FFCB6B}html pre.shiki code .sfyAc, html code.shiki .sfyAc{--shiki-default:#C3E88D}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html pre.shiki code .s6cf3, html code.shiki .s6cf3{--shiki-default:#89DDFF;--shiki-default-font-style:italic}html pre.shiki code .s0W1g, html code.shiki .s0W1g{--shiki-default:#BABED8}html pre.shiki code .sJ14y, html code.shiki .sJ14y{--shiki-default:#C792EA}html pre.shiki code .sdLwU, html code.shiki .sdLwU{--shiki-default:#82AAFF}html pre.shiki code .sAklC, html code.shiki .sAklC{--shiki-default:#89DDFF}html pre.shiki code .saEQR, html code.shiki .saEQR{--shiki-default:#676E95;--shiki-default-font-style:italic}html pre.shiki code .s7ZW3, html code.shiki .s7ZW3{--shiki-default:#BABED8;--shiki-default-font-style:italic}html pre.shiki code .sx098, html code.shiki .sx098{--shiki-default:#F78C6C}html pre.shiki code .s-wAU, html code.shiki .s-wAU{--shiki-default:#F07178}",{"title":23,"searchDepth":24,"depth":24,"links":35034},[35035,35039,35040,35044,35051,35052,35058,35064,35067,35071,35075],{"id":29610,"depth":24,"text":29611,"children":35036},[35037,35038],{"id":29617,"depth":675,"text":29618},{"id":29636,"depth":675,"text":29637},{"id":29656,"depth":24,"text":29657},{"id":29742,"depth":24,"text":29743,"children":35041},[35042,35043],{"id":29746,"depth":675,"text":29747},{"id":29785,"depth":675,"text":29786},{"id":29814,"depth":24,"text":29815,"children":35045},[35046,35048,35049],{"id":30079,"depth":675,"text":35047},"Complete Publisher Code (publisher.py):",{"id":30247,"depth":675,"text":30248},{"id":30731,"depth":675,"text":35050},"Complete Subscriber Code (subscriber.py):",{"id":31111,"depth":24,"text":31112},{"id":31224,"depth":24,"text":31225,"children":35053},[35054,35055,35056,35057],{"id":31228,"depth":675,"text":31229},{"id":31251,"depth":675,"text":31252},{"id":31343,"depth":675,"text":31344},{"id":31645,"depth":675,"text":31646},{"id":31987,"depth":24,"text":31988,"children":35059},[35060,35061,35062,35063],{"id":31991,"depth":675,"text":31992},{"id":32004,"depth":675,"text":32005},{"id":32098,"depth":675,"text":32099},{"id":32634,"depth":675,"text":32635},{"id":33052,"depth":24,"text":33053,"children":35065},[35066],{"id":33778,"depth":675,"text":33779},{"id":34459,"depth":24,"text":34460,"children":35068},[35069,35070],{"id":34475,"depth":675,"text":34476},{"id":34750,"depth":675,"text":34751},{"id":8793,"depth":24,"text":8794,"children":35072},[35073,35074],{"id":34907,"depth":675,"text":34908},{"id":34942,"depth":675,"text":34943},{"id":34969,"depth":24,"text":34970},"NATS, a high-performance messaging system, and Pathway Live Data Framework, a powerful batch and stream processing framework. Building such systems can be complex, but powerful tools like NATS and Pathway Live Data Framework are making it easier than ever",{"layout":90,"date":35078,"thumbnail":35079,"tags":35081,"coauthors":35082,"hidden":35},"2024-12-11",{"src":35080,"provider":11},"\u002Fassets\u002Fblog\u002Fthumbnails\u002Fpathway-nats-th.png",[17824],[35083],{"name":17837,"description":23034,"img":17839,"linkedin":17840},"\u002Fframework\u002Fblog\u002Fbuild-real-time-systems-nats-pathway-alternative-kafka-flink",{"title":29572,"description":35076},{"loc":35084},"framework\u002Fblog\u002F883.build-real-time-systems-nats-pathway-alternative-kafka-flink","fIq3Nkh4o1nnHpWFTKPVqYMy6TBMrIgmYo-NcSCh3-Y",{"id":35090,"title":35091,"author":35092,"body":35099,"description":23,"extension":27,"meta":35110,"navigation":35,"path":35114,"seo":35115,"sitemap":35116,"stem":35117,"__hash__":35118},"content\u002Fframework\u002Fblog\u002F885.option-greeks.md","Computing the Option Greeks using Pathway and Databento",{"id":35093,"url":35094,"name":35095,"description":35096,"img":35097,"provider":11,"linkedin":35098},"luca","luca-metehau","Luca Metehau","Engineer","\u002Fassets\u002Fauthors\u002Fluca-metehau.jpg","https:\u002F\u002Fwww.linkedin.com\u002Fin\u002Fluca-mihnea-metehau-76113126b\u002F",{"type":13,"value":35100,"toc":35108},[35101,35105],[68,35102,35104],{"id":35103},"taking-you-to-an-another-page","Taking you to an another page...",[84,35106],{"url":35107},"\u002Fdevelopers\u002Ftemplates\u002Fetl\u002Foption-greeks",{"title":23,"searchDepth":24,"depth":24,"links":35109},[],{"layout":90,"redirect":35107,"date":5045,"thumbnail":35111,"tags":35113,"hidden":35},{"src":35112},"\u002Fassets\u002Fcontent\u002Fshowcases\u002Foption-greeks\u002Foption-greeks.svg",[17824,6268],"\u002Fframework\u002Fblog\u002Foption-greeks",{"title":35091,"description":23},{"loc":35114},"framework\u002Fblog\u002F885.option-greeks","mnyiCdul_3QlRlPk6cstRm7yJGt5dK8_cIub4EDNmaU",[35120,35121],{"title":2001,"path":2016,"stem":2019,"children":-1},{"title":2037,"path":2052,"stem":2055,"children":-1},1781255060714]