Update README.md (#209)

rohit-ganguly · web-flow · commit cccbd1dbe0c0 · 2025-05-08T17:58:28.000-07:00
diff --git a/README.md b/README.md
@@ -1,4 +1,24 @@
-# RAG on PostgreSQL
+# RAG on PostgreSQL (PyCon US 2025 Demo!)
+
+> This repository is used in the Microsoft sponsor session at PyCon US 2025 by Rohit Ganguly and Pamela Fox.
+> If you're interested in seeing the original repository for this, visit Pamela's original one that this is forked from [here](https://github.com/Azure-Samples/rag-postgres-openai-python)!
+
+## Resource links used in the presentation:
+
+### VS Code Extensions
+- [Python](https://marketplace.visualstudio.com/items?itemName=ms-python.python)
+- [GitHub Copilot](https://marketplace.visualstudio.com/items?itemName=GitHub.copilot)
+- [GitHub Copilot for Azure](https://marketplace.visualstudio.com/items?itemName=ms-azuretools.vscode-azure-github-copilot)
+- [Dev Containers](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers)
+- [The (new!) PostgreSQL Extension (link coming soon!)](#)
+- [AI Toolkit](https://marketplace.visualstudio.com/items?itemName=ms-windows-ai-studio.windows-ai-studio)
+
+### Azure Services & Tools
+- [Azure Developer CLI](https://aka.ms/azd)
+- [Azure Container Apps](https://aka.ms/acadocs)
+- [Azure Database for PostgreSQL](https://aka.ms/postgresdocs)
+- [Azure AI Foundry](https://aka.ms/aifoundrydocs)
+
 
 [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/Azure-Samples/rag-postgres-openai-python)
 [![Open in Dev Containers](https://img.shields.io/static/v1?style=for-the-badge&label=Dev%20Containers&message=Open&color=blue&logo=visualstudiocode)](https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/azure-samples/rag-postgres-openai-python)
diff --git a/convert_csv_json.py b/convert_csv_json.py
@@ -0,0 +1,51 @@
+import ast
+import csv
+import json
+
+# Read CSV file - Using the correct dialect to handle quotes properly
+with open("data.csv", encoding="utf-8") as csv_file:
+    # Use the csv.reader with proper quoting parameters
+    csv_reader = csv.reader(csv_file, quoting=csv.QUOTE_ALL, doublequote=True, escapechar="\\")
+    header = next(csv_reader)  # Get the header row
+    data = list(csv_reader)  # Get all data rows
+
+# Convert to JSON format
+json_data = []
+for row in data:
+    item = {}
+    for i in range(len(header)):
+        if i < len(row):  # Ensure we don't go out of bounds
+            value = row[i].strip()
+            # Check if the value looks like a JSON array
+            if value.startswith("[") and value.endswith("]"):
+                try:
+                    # Parse the JSON-like string into a Python object
+                    value = json.loads(value.replace("'", '"'))
+                except (ValueError, SyntaxError):
+                    try:
+                        # Try with ast as a fallback
+                        value = ast.literal_eval(value)
+                    except (ValueError, SyntaxError):
+                        # If parsing fails, keep it as a string
+                        pass
+            # Convert boolean strings
+            elif value.lower() == "true":
+                value = True
+            elif value.lower() == "false":
+                value = False
+            # Try to convert numbers
+            elif value.isdigit():
+                value = int(value)
+            elif value.replace(".", "", 1).isdigit() and value.count(".") <= 1:
+                value = float(value)
+
+            item[header[i]] = value
+    # remove is_open column
+    del item["is_open"]
+    json_data.append(item)
+
+# Write to JSON file
+with open("data.json", "w", encoding="utf-8") as f:
+    json.dump(json_data, f, indent=4, ensure_ascii=False)
+
+print(f"Successfully converted CSV data to JSON format with {len(json_data)} records")
diff --git a/src/backend/fastapi_app/api_models.py b/src/backend/fastapi_app/api_models.py
@@ -44,14 +44,21 @@ class ChatRequest(BaseModel):
 
 class ItemPublic(BaseModel):
     id: int
-    type: str
-    brand: str
     name: str
+    location: str
+    cuisine: str
+    rating: int
+    price_level: int
+    review_count: int
+    hours: str
+    tags: list[str]
     description: str
-    price: float
+    menu_summary: str
+    top_reviews: str
+    vibe: str
 
     def to_str_for_rag(self):
-        return f"Name:{self.name} Description:{self.description} Price:{self.price} Brand:{self.brand} Type:{self.type}"
+        return f"Name:{self.name} Description:{self.description} Location:{self.location} Cuisine:{self.cuisine} Rating:{self.rating} Price Level:{self.price_level} Review Count:{self.review_count} Hours:{self.hours} Tags:{self.tags} Menu Summary:{self.menu_summary} Top Reviews:{self.top_reviews} Vibe:{self.vibe}"  # noqa: E501
 
 
 class ItemWithDistance(ItemPublic):
@@ -105,16 +112,18 @@ class Filter(BaseModel):
     value: Any
 
 
-class PriceFilter(Filter):
-    column: str = Field(default="price", description="The column to filter on (always 'price' for this filter)")
-    comparison_operator: str = Field(description="The operator for price comparison ('>', '<', '>=', '<=', '=')")
-    value: float = Field(description="The price value to compare against (e.g., 30.00)")
+class PriceLevelFilter(Filter):
+    column: str = Field(
+        default="price_level", description="The column to filter on (always 'price_level' for this filter)"
+    )
+    comparison_operator: str = Field(description="The operator for price level comparison ('>', '<', '>=', '<=', '=')")
+    value: float = Field(description="Value to compare against, either 1, 2, 3, 4")
 
 
-class BrandFilter(Filter):
-    column: str = Field(default="brand", description="The column to filter on (always 'brand' for this filter)")
-    comparison_operator: str = Field(description="The operator for brand comparison ('=' or '!=')")
-    value: str = Field(description="The brand name to compare against (e.g., 'AirStrider')")
+class RatingFilter(Filter):
+    column: str = Field(default="rating", description="The column to filter on (always 'rating' for this filter)")
+    comparison_operator: str = Field(description="The operator for rating comparison ('>', '<', '>=', '<=', '=')")
+    value: str = Field(description="Value to compare against, either 0 1 2 3 4")
 
 
 class SearchResults(BaseModel):
diff --git a/src/backend/fastapi_app/postgres_models.py b/src/backend/fastapi_app/postgres_models.py
@@ -1,7 +1,8 @@
 from __future__ import annotations
 
 from pgvector.sqlalchemy import Vector
-from sqlalchemy import Index
+from sqlalchemy import VARCHAR, Index
+from sqlalchemy.dialects import postgresql
 from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
 
 
@@ -13,11 +14,19 @@ class Base(DeclarativeBase):
 class Item(Base):
     __tablename__ = "items"
     id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
-    type: Mapped[str] = mapped_column()
-    brand: Mapped[str] = mapped_column()
     name: Mapped[str] = mapped_column()
+    location: Mapped[str] = mapped_column()
+    cuisine: Mapped[str] = mapped_column()
+    rating: Mapped[int] = mapped_column()
+    price_level: Mapped[int] = mapped_column()
+    review_count: Mapped[int] = mapped_column()
+    hours: Mapped[str] = mapped_column()
+    tags: Mapped[list[str]] = mapped_column(postgresql.ARRAY(VARCHAR))  # Array of strings
     description: Mapped[str] = mapped_column()
-    price: Mapped[float] = mapped_column()
+    menu_summary: Mapped[str] = mapped_column()
+    top_reviews: Mapped[str] = mapped_column()
+    vibe: Mapped[str] = mapped_column()
+
     # Embeddings for different models:
     embedding_3l: Mapped[Vector] = mapped_column(Vector(1024), nullable=True)  # text-embedding-3-large
     embedding_nomic: Mapped[Vector] = mapped_column(Vector(768), nullable=True)  # nomic-embed-text
@@ -33,10 +42,10 @@ def to_dict(self, include_embedding: bool = False):
         return model_dict
 
     def to_str_for_rag(self):
-        return f"Name:{self.name} Description:{self.description} Price:{self.price} Brand:{self.brand} Type:{self.type}"
+        return f"Name:{self.name} Description:{self.description} Location:{self.location} Cuisine:{self.cuisine} Rating:{self.rating} Price Level:{self.price_level} Review Count:{self.review_count} Hours:{self.hours} Tags:{self.tags} Menu Summary:{self.menu_summary} Top Reviews:{self.top_reviews} Vibe:{self.vibe}"  # noqa: E501
 
     def to_str_for_embedding(self):
-        return f"Name: {self.name} Description: {self.description} Type: {self.type}"
+        return f"Name: {self.name} Description: {self.description} Cuisine: {self.cuisine} Tags: {self.tags} Menu Summary: {self.menu_summary} Top Reviews: {self.top_reviews} Vibe: {self.vibe}"  # noqa: E501
 
 
 """
diff --git a/src/backend/fastapi_app/prompts/answer.txt b/src/backend/fastapi_app/prompts/answer.txt
@@ -1,9 +1,9 @@
-Assistant helps customers with questions about products.
-Respond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.
-Answer ONLY with the product details listed in the products.
+Assistant helps Pycon attendees with questions about restaurants.
+Respond as if you are a conference volunteer. Do NOT respond with tables.
+Answer ONLY with the restaurant details listed in the sources.
 If there isn't enough information below, say you don't know.
 Do not generate answers that don't use the sources below.
-Each product has an ID in brackets followed by colon and the product details.
-Always include the product ID for each product you use in the response.
-Use square brackets to reference the source, for example [52].
-Don't combine citations, list each product separately, for example [27][51].
+Each restaurant has an ID in brackets followed by colon and the restaurant details.
+Always include the restaurant ID for each restaurant you reference in the response.
+Use square brackets to reference the restaurant, for example [52].
+Don't combine references, cite each restaurant separately, for example [27][51].
diff --git a/src/backend/fastapi_app/prompts/query.txt b/src/backend/fastapi_app/prompts/query.txt
@@ -1,5 +1,5 @@
 Your job is to find search results based off the user's question and past messages.
 You have access to only these tools:
-1. **search_database**: This tool allows you to search a table for items based on a query.
+1. **search_database**: This tool allows you to search a table for restaurants based on a query.
   You can pass in a search query and optional filters.
-Once you get the search results, you're done.
+Once you get the search results, you're done.
diff --git a/src/backend/fastapi_app/prompts/query_fewshots.json b/src/backend/fastapi_app/prompts/query_fewshots.json
@@ -2,7 +2,7 @@
   {
     "parts": [
       {
-        "content": "good options for climbing gear that can be used outside?",
+        "content": "good options for ethiopian restaurants?",
         "timestamp": "2025-05-07T19:02:46.977501Z",
         "part_kind": "user-prompt"
       }
@@ -14,7 +14,7 @@
     "parts": [
       {
         "tool_name": "search_database",
-        "args": "{\"search_query\":\"climbing gear outside\"}",
+        "args": "{\"search_query\":\"ethiopian\"}",
         "tool_call_id": "call_4HeBCmo2uioV6CyoePEGyZPc",
         "part_kind": "tool-call"
       }
@@ -27,7 +27,7 @@
     "parts": [
       {
         "tool_name": "search_database",
-        "content": "Search results for climbing gear that can be used outside: ...",
+        "content": "Search results for ethiopian: ...",
         "tool_call_id": "call_4HeBCmo2uioV6CyoePEGyZPc",
         "timestamp": "2025-05-07T19:02:48.242408Z",
         "part_kind": "tool-return"
@@ -39,7 +39,7 @@
   {
     "parts": [
       {
-        "content": "are there any shoes less than $50?",
+        "content": "are there any inexpensive chinese restaurants?",
         "timestamp": "2025-05-07T19:02:46.977501Z",
         "part_kind": "user-prompt"
       }
@@ -51,7 +51,7 @@
     "parts": [
       {
         "tool_name": "search_database",
-        "args": "{\"search_query\":\"shoes\",\"price_filter\":{\"comparison_operator\":\"<\",\"value\":50}}",
+        "args": "{\"search_query\":\"chinese\",\"price_level_filter\":{\"comparison_operator\":\"<\",\"value\":3}}",
         "tool_call_id": "call_4HeBCmo2uioV6CyoePEGyZPc",
         "part_kind": "tool-call"
       }
@@ -64,7 +64,7 @@
     "parts": [
       {
         "tool_name": "search_database",
-        "content": "Search results for shoes cheaper than 50: ...",
+        "content": "Search results for chinese: ...",
         "tool_call_id": "call_4HeBCmo2uioV6CyoePEGyZPc",
         "timestamp": "2025-05-07T19:02:48.242408Z",
         "part_kind": "tool-return"
diff --git a/src/backend/fastapi_app/query_rewriter.py b/src/backend/fastapi_app/query_rewriter.py
@@ -12,39 +12,39 @@ def build_search_function() -> list[ChatCompletionToolParam]:
             "type": "function",
             "function": {
                 "name": "search_database",
-                "description": "Search PostgreSQL database for relevant products based on user query",
+                "description": "Search PostgreSQL database for relevant restaurants based on user query",
                 "parameters": {
                     "type": "object",
                     "properties": {
                         "search_query": {
                             "type": "string",
                             "description": "Query string to use for full text search, e.g. 'red shoes'",
                         },
-                        "price_filter": {
+                        "price_level_filter": {
                             "type": "object",
-                            "description": "Filter search results based on price of the product",
+                            "description": "Filter search results to a certain price level (from 1 $ to 4 $$$$, with 4 being most costly)",  # noqa: E501
                             "properties": {
                                 "comparison_operator": {
                                     "type": "string",
-                                    "description": "Operator to compare the column value, either '>', '<', '>=', '<=', '='",  # noqa
+                                    "description": "Operator to compare the column value, either '>', '<', '>=', '<=', '='",  # noqa: E501
                                 },
                                 "value": {
                                     "type": "number",
-                                    "description": "Value to compare against, e.g. 30",
+                                    "description": "Value to compare against, either 1, 2, 3, 4",
                                 },
                             },
                         },
-                        "brand_filter": {
+                        "rating_filter": {
                             "type": "object",
-                            "description": "Filter search results based on brand of the product",
+                            "description": "Filter search results based on ratings of restaurant (from 1 to 5 stars, with 5 the best)",  # noqa: E501
                             "properties": {
                                 "comparison_operator": {
                                     "type": "string",
-                                    "description": "Operator to compare the column value, either '=' or '!='",
+                                    "description": "Operator to compare the column value, either '>', '<', '>=', '<=', '='",  # noqa: E501
                                 },
                                 "value": {
                                     "type": "string",
-                                    "description": "Value to compare against, e.g. AirStrider",
+                                    "description": "Value to compare against, either 0 1 2 3 4 5",
                                 },
                             },
                         },
@@ -69,22 +69,26 @@ def extract_search_arguments(original_user_query: str, chat_completion: ChatComp
                 arg = json.loads(function.arguments)
                 # Even though its required, search_query is not always specified
                 search_query = arg.get("search_query", original_user_query)
-                if "price_filter" in arg and arg["price_filter"] and isinstance(arg["price_filter"], dict):
-                    price_filter = arg["price_filter"]
+                if (
+                    "price_level_filter" in arg
+                    and arg["price_level_filter"]
+                    and isinstance(arg["price_level_filter"], dict)
+                ):
+                    price_level_filter = arg["price_level_filter"]
                     filters.append(
                         {
-                            "column": "price",
-                            "comparison_operator": price_filter["comparison_operator"],
-                            "value": price_filter["value"],
+                            "column": "price_level",
+                            "comparison_operator": price_level_filter["comparison_operator"],
+                            "value": price_level_filter["value"],
                         }
                     )
-                if "brand_filter" in arg and arg["brand_filter"] and isinstance(arg["brand_filter"], dict):
-                    brand_filter = arg["brand_filter"]
+                if "rating_filter" in arg and arg["rating_filter"] and isinstance(arg["rating_filter"], dict):
+                    rating_filter = arg["rating_filter"]
                     filters.append(
                         {
-                            "column": "brand",
-                            "comparison_operator": brand_filter["comparison_operator"],
-                            "value": brand_filter["value"],
+                            "column": "rating",
+                            "comparison_operator": rating_filter["comparison_operator"],
+                            "value": rating_filter["value"],
                         }
                     )
     elif query_text := response_message.content:
diff --git a/src/backend/fastapi_app/rag_advanced.py b/src/backend/fastapi_app/rag_advanced.py
@@ -11,13 +11,13 @@
 
 from fastapi_app.api_models import (
     AIChatRoles,
-    BrandFilter,
     ChatRequestOverrides,
     Filter,
     ItemPublic,
     Message,
-    PriceFilter,
+    PriceLevelFilter,
     RAGContext,
+    RatingFilter,
     RetrievalResponse,
     RetrievalResponseDelta,
     SearchResults,
@@ -75,8 +75,8 @@ async def search_database(
         self,
         ctx: RunContext[ChatParams],
         search_query: str,
-        price_filter: Optional[PriceFilter] = None,
-        brand_filter: Optional[BrandFilter] = None,
+        price_filter: Optional[PriceLevelFilter] = None,
+        brand_filter: Optional[RatingFilter] = None,
     ) -> SearchResults:
         """
         Search PostgreSQL database for relevant products based on user query
diff --git a/src/backend/fastapi_app/seed_data.json b/src/backend/fastapi_app/seed_data.json
diff --git a/src/frontend/src/components/Answer/Answer.tsx b/src/frontend/src/components/Answer/Answer.tsx
diff --git a/src/frontend/src/components/Example/ExampleList.tsx b/src/frontend/src/components/Example/ExampleList.tsx

Original file line number	Diff line number	Diff line change
`@@ -2,7 +2,7 @@`
`2`	`2`	`{`
`3`	`3`	`"parts": [`
`4`	`4`	`{`
`5`		`- "content": "good options for climbing gear that can be used outside?",`
	`5`	`+ "content": "good options for ethiopian restaurants?",`
`6`	`6`	`"timestamp": "2025-05-07T19:02:46.977501Z",`
`7`	`7`	`"part_kind": "user-prompt"`
`8`	`8`	`}`
`@@ -14,7 +14,7 @@`
`14`	`14`	`"parts": [`
`15`	`15`	`{`
`16`	`16`	`"tool_name": "search_database",`
`17`		`- "args": "{\"search_query\":\"climbing gear outside\"}",`
	`17`	`+ "args": "{\"search_query\":\"ethiopian\"}",`
`18`	`18`	`"tool_call_id": "call_4HeBCmo2uioV6CyoePEGyZPc",`
`19`	`19`	`"part_kind": "tool-call"`
`20`	`20`	`}`
`@@ -27,7 +27,7 @@`
`27`	`27`	`"parts": [`
`28`	`28`	`{`
`29`	`29`	`"tool_name": "search_database",`
`30`		`- "content": "Search results for climbing gear that can be used outside: ...",`
	`30`	`+ "content": "Search results for ethiopian: ...",`
`31`	`31`	`"tool_call_id": "call_4HeBCmo2uioV6CyoePEGyZPc",`
`32`	`32`	`"timestamp": "2025-05-07T19:02:48.242408Z",`
`33`	`33`	`"part_kind": "tool-return"`
`@@ -39,7 +39,7 @@`
`39`	`39`	`{`
`40`	`40`	`"parts": [`
`41`	`41`	`{`
`42`		`- "content": "are there any shoes less than $50?",`
	`42`	`+ "content": "are there any inexpensive chinese restaurants?",`
`43`	`43`	`"timestamp": "2025-05-07T19:02:46.977501Z",`
`44`	`44`	`"part_kind": "user-prompt"`
`45`	`45`	`}`
`@@ -51,7 +51,7 @@`
`51`	`51`	`"parts": [`
`52`	`52`	`{`
`53`	`53`	`"tool_name": "search_database",`
`54`		`- "args": "{\"search_query\":\"shoes\",\"price_filter\":{\"comparison_operator\":\"<\",\"value\":50}}",`
	`54`	`+ "args": "{\"search_query\":\"chinese\",\"price_level_filter\":{\"comparison_operator\":\"<\",\"value\":3}}",`
`55`	`55`	`"tool_call_id": "call_4HeBCmo2uioV6CyoePEGyZPc",`
`56`	`56`	`"part_kind": "tool-call"`
`57`	`57`	`}`
`@@ -64,7 +64,7 @@`
`64`	`64`	`"parts": [`
`65`	`65`	`{`
`66`	`66`	`"tool_name": "search_database",`
`67`		`- "content": "Search results for shoes cheaper than 50: ...",`
	`67`	`+ "content": "Search results for chinese: ...",`
`68`	`68`	`"tool_call_id": "call_4HeBCmo2uioV6CyoePEGyZPc",`
`69`	`69`	`"timestamp": "2025-05-07T19:02:48.242408Z",`
`70`	`70`	`"part_kind": "tool-return"`