Skip to content

Commit cccbd1d

Browse files
Update README.md (#209)
1 parent 66316d1 commit cccbd1d

File tree

12 files changed

+90602
-181912
lines changed

12 files changed

+90602
-181912
lines changed

README.md

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,24 @@
1-
# RAG on PostgreSQL
1+
# RAG on PostgreSQL (PyCon US 2025 Demo!)
2+
3+
> This repository is used in the Microsoft sponsor session at PyCon US 2025 by Rohit Ganguly and Pamela Fox.
4+
> If you're interested in seeing the original repository for this, visit Pamela's original one that this is forked from [here](https://github.com/Azure-Samples/rag-postgres-openai-python)!
5+
6+
## Resource links used in the presentation:
7+
8+
### VS Code Extensions
9+
- [Python](https://marketplace.visualstudio.com/items?itemName=ms-python.python)
10+
- [GitHub Copilot](https://marketplace.visualstudio.com/items?itemName=GitHub.copilot)
11+
- [GitHub Copilot for Azure](https://marketplace.visualstudio.com/items?itemName=ms-azuretools.vscode-azure-github-copilot)
12+
- [Dev Containers](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers)
13+
- [The (new!) PostgreSQL Extension (link coming soon!)](#)
14+
- [AI Toolkit](https://marketplace.visualstudio.com/items?itemName=ms-windows-ai-studio.windows-ai-studio)
15+
16+
### Azure Services & Tools
17+
- [Azure Developer CLI](https://aka.ms/azd)
18+
- [Azure Container Apps](https://aka.ms/acadocs)
19+
- [Azure Database for PostgreSQL](https://aka.ms/postgresdocs)
20+
- [Azure AI Foundry](https://aka.ms/aifoundrydocs)
21+
222

323
[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/Azure-Samples/rag-postgres-openai-python)
424
[![Open in Dev Containers](https://img.shields.io/static/v1?style=for-the-badge&label=Dev%20Containers&message=Open&color=blue&logo=visualstudiocode)](https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/azure-samples/rag-postgres-openai-python)

convert_csv_json.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import ast
2+
import csv
3+
import json
4+
5+
# Read CSV file - Using the correct dialect to handle quotes properly
6+
with open("data.csv", encoding="utf-8") as csv_file:
7+
# Use the csv.reader with proper quoting parameters
8+
csv_reader = csv.reader(csv_file, quoting=csv.QUOTE_ALL, doublequote=True, escapechar="\\")
9+
header = next(csv_reader) # Get the header row
10+
data = list(csv_reader) # Get all data rows
11+
12+
# Convert to JSON format
13+
json_data = []
14+
for row in data:
15+
item = {}
16+
for i in range(len(header)):
17+
if i < len(row): # Ensure we don't go out of bounds
18+
value = row[i].strip()
19+
# Check if the value looks like a JSON array
20+
if value.startswith("[") and value.endswith("]"):
21+
try:
22+
# Parse the JSON-like string into a Python object
23+
value = json.loads(value.replace("'", '"'))
24+
except (ValueError, SyntaxError):
25+
try:
26+
# Try with ast as a fallback
27+
value = ast.literal_eval(value)
28+
except (ValueError, SyntaxError):
29+
# If parsing fails, keep it as a string
30+
pass
31+
# Convert boolean strings
32+
elif value.lower() == "true":
33+
value = True
34+
elif value.lower() == "false":
35+
value = False
36+
# Try to convert numbers
37+
elif value.isdigit():
38+
value = int(value)
39+
elif value.replace(".", "", 1).isdigit() and value.count(".") <= 1:
40+
value = float(value)
41+
42+
item[header[i]] = value
43+
# remove is_open column
44+
del item["is_open"]
45+
json_data.append(item)
46+
47+
# Write to JSON file
48+
with open("data.json", "w", encoding="utf-8") as f:
49+
json.dump(json_data, f, indent=4, ensure_ascii=False)
50+
51+
print(f"Successfully converted CSV data to JSON format with {len(json_data)} records")

src/backend/fastapi_app/api_models.py

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -44,14 +44,21 @@ class ChatRequest(BaseModel):
4444

4545
class ItemPublic(BaseModel):
4646
id: int
47-
type: str
48-
brand: str
4947
name: str
48+
location: str
49+
cuisine: str
50+
rating: int
51+
price_level: int
52+
review_count: int
53+
hours: str
54+
tags: list[str]
5055
description: str
51-
price: float
56+
menu_summary: str
57+
top_reviews: str
58+
vibe: str
5259

5360
def to_str_for_rag(self):
54-
return f"Name:{self.name} Description:{self.description} Price:{self.price} Brand:{self.brand} Type:{self.type}"
61+
return f"Name:{self.name} Description:{self.description} Location:{self.location} Cuisine:{self.cuisine} Rating:{self.rating} Price Level:{self.price_level} Review Count:{self.review_count} Hours:{self.hours} Tags:{self.tags} Menu Summary:{self.menu_summary} Top Reviews:{self.top_reviews} Vibe:{self.vibe}" # noqa: E501
5562

5663

5764
class ItemWithDistance(ItemPublic):
@@ -105,16 +112,18 @@ class Filter(BaseModel):
105112
value: Any
106113

107114

108-
class PriceFilter(Filter):
109-
column: str = Field(default="price", description="The column to filter on (always 'price' for this filter)")
110-
comparison_operator: str = Field(description="The operator for price comparison ('>', '<', '>=', '<=', '=')")
111-
value: float = Field(description="The price value to compare against (e.g., 30.00)")
115+
class PriceLevelFilter(Filter):
116+
column: str = Field(
117+
default="price_level", description="The column to filter on (always 'price_level' for this filter)"
118+
)
119+
comparison_operator: str = Field(description="The operator for price level comparison ('>', '<', '>=', '<=', '=')")
120+
value: float = Field(description="Value to compare against, either 1, 2, 3, 4")
112121

113122

114-
class BrandFilter(Filter):
115-
column: str = Field(default="brand", description="The column to filter on (always 'brand' for this filter)")
116-
comparison_operator: str = Field(description="The operator for brand comparison ('=' or '!=')")
117-
value: str = Field(description="The brand name to compare against (e.g., 'AirStrider')")
123+
class RatingFilter(Filter):
124+
column: str = Field(default="rating", description="The column to filter on (always 'rating' for this filter)")
125+
comparison_operator: str = Field(description="The operator for rating comparison ('>', '<', '>=', '<=', '=')")
126+
value: str = Field(description="Value to compare against, either 0 1 2 3 4")
118127

119128

120129
class SearchResults(BaseModel):

src/backend/fastapi_app/postgres_models.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
from __future__ import annotations
22

33
from pgvector.sqlalchemy import Vector
4-
from sqlalchemy import Index
4+
from sqlalchemy import VARCHAR, Index
5+
from sqlalchemy.dialects import postgresql
56
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
67

78

@@ -13,11 +14,19 @@ class Base(DeclarativeBase):
1314
class Item(Base):
1415
__tablename__ = "items"
1516
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
16-
type: Mapped[str] = mapped_column()
17-
brand: Mapped[str] = mapped_column()
1817
name: Mapped[str] = mapped_column()
18+
location: Mapped[str] = mapped_column()
19+
cuisine: Mapped[str] = mapped_column()
20+
rating: Mapped[int] = mapped_column()
21+
price_level: Mapped[int] = mapped_column()
22+
review_count: Mapped[int] = mapped_column()
23+
hours: Mapped[str] = mapped_column()
24+
tags: Mapped[list[str]] = mapped_column(postgresql.ARRAY(VARCHAR)) # Array of strings
1925
description: Mapped[str] = mapped_column()
20-
price: Mapped[float] = mapped_column()
26+
menu_summary: Mapped[str] = mapped_column()
27+
top_reviews: Mapped[str] = mapped_column()
28+
vibe: Mapped[str] = mapped_column()
29+
2130
# Embeddings for different models:
2231
embedding_3l: Mapped[Vector] = mapped_column(Vector(1024), nullable=True) # text-embedding-3-large
2332
embedding_nomic: Mapped[Vector] = mapped_column(Vector(768), nullable=True) # nomic-embed-text
@@ -33,10 +42,10 @@ def to_dict(self, include_embedding: bool = False):
3342
return model_dict
3443

3544
def to_str_for_rag(self):
36-
return f"Name:{self.name} Description:{self.description} Price:{self.price} Brand:{self.brand} Type:{self.type}"
45+
return f"Name:{self.name} Description:{self.description} Location:{self.location} Cuisine:{self.cuisine} Rating:{self.rating} Price Level:{self.price_level} Review Count:{self.review_count} Hours:{self.hours} Tags:{self.tags} Menu Summary:{self.menu_summary} Top Reviews:{self.top_reviews} Vibe:{self.vibe}" # noqa: E501
3746

3847
def to_str_for_embedding(self):
39-
return f"Name: {self.name} Description: {self.description} Type: {self.type}"
48+
return f"Name: {self.name} Description: {self.description} Cuisine: {self.cuisine} Tags: {self.tags} Menu Summary: {self.menu_summary} Top Reviews: {self.top_reviews} Vibe: {self.vibe}" # noqa: E501
4049

4150

4251
"""
Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
Assistant helps customers with questions about products.
2-
Respond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.
3-
Answer ONLY with the product details listed in the products.
1+
Assistant helps Pycon attendees with questions about restaurants.
2+
Respond as if you are a conference volunteer. Do NOT respond with tables.
3+
Answer ONLY with the restaurant details listed in the sources.
44
If there isn't enough information below, say you don't know.
55
Do not generate answers that don't use the sources below.
6-
Each product has an ID in brackets followed by colon and the product details.
7-
Always include the product ID for each product you use in the response.
8-
Use square brackets to reference the source, for example [52].
9-
Don't combine citations, list each product separately, for example [27][51].
6+
Each restaurant has an ID in brackets followed by colon and the restaurant details.
7+
Always include the restaurant ID for each restaurant you reference in the response.
8+
Use square brackets to reference the restaurant, for example [52].
9+
Don't combine references, cite each restaurant separately, for example [27][51].
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
Your job is to find search results based off the user's question and past messages.
22
You have access to only these tools:
3-
1. **search_database**: This tool allows you to search a table for items based on a query.
3+
1. **search_database**: This tool allows you to search a table for restaurants based on a query.
44
You can pass in a search query and optional filters.
5-
Once you get the search results, you're done.
5+
Once you get the search results, you're done.

src/backend/fastapi_app/prompts/query_fewshots.json

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
{
33
"parts": [
44
{
5-
"content": "good options for climbing gear that can be used outside?",
5+
"content": "good options for ethiopian restaurants?",
66
"timestamp": "2025-05-07T19:02:46.977501Z",
77
"part_kind": "user-prompt"
88
}
@@ -14,7 +14,7 @@
1414
"parts": [
1515
{
1616
"tool_name": "search_database",
17-
"args": "{\"search_query\":\"climbing gear outside\"}",
17+
"args": "{\"search_query\":\"ethiopian\"}",
1818
"tool_call_id": "call_4HeBCmo2uioV6CyoePEGyZPc",
1919
"part_kind": "tool-call"
2020
}
@@ -27,7 +27,7 @@
2727
"parts": [
2828
{
2929
"tool_name": "search_database",
30-
"content": "Search results for climbing gear that can be used outside: ...",
30+
"content": "Search results for ethiopian: ...",
3131
"tool_call_id": "call_4HeBCmo2uioV6CyoePEGyZPc",
3232
"timestamp": "2025-05-07T19:02:48.242408Z",
3333
"part_kind": "tool-return"
@@ -39,7 +39,7 @@
3939
{
4040
"parts": [
4141
{
42-
"content": "are there any shoes less than $50?",
42+
"content": "are there any inexpensive chinese restaurants?",
4343
"timestamp": "2025-05-07T19:02:46.977501Z",
4444
"part_kind": "user-prompt"
4545
}
@@ -51,7 +51,7 @@
5151
"parts": [
5252
{
5353
"tool_name": "search_database",
54-
"args": "{\"search_query\":\"shoes\",\"price_filter\":{\"comparison_operator\":\"<\",\"value\":50}}",
54+
"args": "{\"search_query\":\"chinese\",\"price_level_filter\":{\"comparison_operator\":\"<\",\"value\":3}}",
5555
"tool_call_id": "call_4HeBCmo2uioV6CyoePEGyZPc",
5656
"part_kind": "tool-call"
5757
}
@@ -64,7 +64,7 @@
6464
"parts": [
6565
{
6666
"tool_name": "search_database",
67-
"content": "Search results for shoes cheaper than 50: ...",
67+
"content": "Search results for chinese: ...",
6868
"tool_call_id": "call_4HeBCmo2uioV6CyoePEGyZPc",
6969
"timestamp": "2025-05-07T19:02:48.242408Z",
7070
"part_kind": "tool-return"

src/backend/fastapi_app/query_rewriter.py

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -12,39 +12,39 @@ def build_search_function() -> list[ChatCompletionToolParam]:
1212
"type": "function",
1313
"function": {
1414
"name": "search_database",
15-
"description": "Search PostgreSQL database for relevant products based on user query",
15+
"description": "Search PostgreSQL database for relevant restaurants based on user query",
1616
"parameters": {
1717
"type": "object",
1818
"properties": {
1919
"search_query": {
2020
"type": "string",
2121
"description": "Query string to use for full text search, e.g. 'red shoes'",
2222
},
23-
"price_filter": {
23+
"price_level_filter": {
2424
"type": "object",
25-
"description": "Filter search results based on price of the product",
25+
"description": "Filter search results to a certain price level (from 1 $ to 4 $$$$, with 4 being most costly)", # noqa: E501
2626
"properties": {
2727
"comparison_operator": {
2828
"type": "string",
29-
"description": "Operator to compare the column value, either '>', '<', '>=', '<=', '='", # noqa
29+
"description": "Operator to compare the column value, either '>', '<', '>=', '<=', '='", # noqa: E501
3030
},
3131
"value": {
3232
"type": "number",
33-
"description": "Value to compare against, e.g. 30",
33+
"description": "Value to compare against, either 1, 2, 3, 4",
3434
},
3535
},
3636
},
37-
"brand_filter": {
37+
"rating_filter": {
3838
"type": "object",
39-
"description": "Filter search results based on brand of the product",
39+
"description": "Filter search results based on ratings of restaurant (from 1 to 5 stars, with 5 the best)", # noqa: E501
4040
"properties": {
4141
"comparison_operator": {
4242
"type": "string",
43-
"description": "Operator to compare the column value, either '=' or '!='",
43+
"description": "Operator to compare the column value, either '>', '<', '>=', '<=', '='", # noqa: E501
4444
},
4545
"value": {
4646
"type": "string",
47-
"description": "Value to compare against, e.g. AirStrider",
47+
"description": "Value to compare against, either 0 1 2 3 4 5",
4848
},
4949
},
5050
},
@@ -69,22 +69,26 @@ def extract_search_arguments(original_user_query: str, chat_completion: ChatComp
6969
arg = json.loads(function.arguments)
7070
# Even though its required, search_query is not always specified
7171
search_query = arg.get("search_query", original_user_query)
72-
if "price_filter" in arg and arg["price_filter"] and isinstance(arg["price_filter"], dict):
73-
price_filter = arg["price_filter"]
72+
if (
73+
"price_level_filter" in arg
74+
and arg["price_level_filter"]
75+
and isinstance(arg["price_level_filter"], dict)
76+
):
77+
price_level_filter = arg["price_level_filter"]
7478
filters.append(
7579
{
76-
"column": "price",
77-
"comparison_operator": price_filter["comparison_operator"],
78-
"value": price_filter["value"],
80+
"column": "price_level",
81+
"comparison_operator": price_level_filter["comparison_operator"],
82+
"value": price_level_filter["value"],
7983
}
8084
)
81-
if "brand_filter" in arg and arg["brand_filter"] and isinstance(arg["brand_filter"], dict):
82-
brand_filter = arg["brand_filter"]
85+
if "rating_filter" in arg and arg["rating_filter"] and isinstance(arg["rating_filter"], dict):
86+
rating_filter = arg["rating_filter"]
8387
filters.append(
8488
{
85-
"column": "brand",
86-
"comparison_operator": brand_filter["comparison_operator"],
87-
"value": brand_filter["value"],
89+
"column": "rating",
90+
"comparison_operator": rating_filter["comparison_operator"],
91+
"value": rating_filter["value"],
8892
}
8993
)
9094
elif query_text := response_message.content:

src/backend/fastapi_app/rag_advanced.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,13 @@
1111

1212
from fastapi_app.api_models import (
1313
AIChatRoles,
14-
BrandFilter,
1514
ChatRequestOverrides,
1615
Filter,
1716
ItemPublic,
1817
Message,
19-
PriceFilter,
18+
PriceLevelFilter,
2019
RAGContext,
20+
RatingFilter,
2121
RetrievalResponse,
2222
RetrievalResponseDelta,
2323
SearchResults,
@@ -75,8 +75,8 @@ async def search_database(
7575
self,
7676
ctx: RunContext[ChatParams],
7777
search_query: str,
78-
price_filter: Optional[PriceFilter] = None,
79-
brand_filter: Optional[BrandFilter] = None,
78+
price_filter: Optional[PriceLevelFilter] = None,
79+
brand_filter: Optional[RatingFilter] = None,
8080
) -> SearchResults:
8181
"""
8282
Search PostgreSQL database for relevant products based on user query

0 commit comments

Comments
 (0)