Skip to content

Add playwright tests #4

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .devcontainer/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ services:
POSTGRES_USER: admin
POSTGRES_PASSWORD: postgres

ports:
- "5432:5432"
# For local developemnt, we need to forward the database port here too.
# Add "forwardPorts": ["5432"] to **devcontainer.json** to forward PostgreSQL locally.
# (Adding the "ports" property to this file will not forward from a Codespace.)

Expand Down
14 changes: 13 additions & 1 deletion .github/workflows/app-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,4 +73,16 @@ jobs:
- name: Run MyPy
run: python3 -m mypy .
- name: Run Pytest
run: python3 -m pytest
run: python3 -m pytest -s -vv --cov --cov-fail-under=85
- name: Run E2E tests with Playwright
id: e2e
if: runner.os != 'Windows'
run: |
playwright install chromium --with-deps
python3 -m pytest tests/e2e.py --tracing=retain-on-failure
- name: Upload test artifacts
if: ${{ failure() && steps.e2e.conclusion == 'failure' }}
uses: actions/upload-artifact@v4
with:
name: playwright-traces${{ matrix.python_version }}
path: test-results
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -146,3 +146,5 @@ npm-debug.log*
node_modules
static/

# Playwright test trace
test-results/
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ python_version = 3.12
exclude = [".venv/*"]

[tool.pytest.ini_options]
addopts = "-ra --cov"
addopts = "-ra"
testpaths = ["tests"]
pythonpath = ['src']
pythonpath = ['src/backend']
filterwarnings = ["ignore::DeprecationWarning"]

[[tool.mypy.overrides]]
Expand Down
6 changes: 4 additions & 2 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
-r src/backend/requirements.txt
ruff
mypy
types-requests
pre-commit
pip-tools
pip-compile-cross-platform
playwright
pytest
pytest-cov
pytest-asyncio
pytest-cov
pytest-playwright
pytest-snapshot
mypy
locust
5 changes: 3 additions & 2 deletions src/backend/fastapi_app/api_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class ChatRequestContext(BaseModel):
class ChatRequest(BaseModel):
messages: list[ChatCompletionMessageParam]
context: ChatRequestContext
sessionState: Any | None = None


class ThoughtStep(BaseModel):
Expand All @@ -54,13 +55,13 @@ class RAGContext(BaseModel):
class RetrievalResponse(BaseModel):
message: Message
context: RAGContext
session_state: Any | None = None
sessionState: Any | None = None


class RetrievalResponseDelta(BaseModel):
delta: Message | None = None
context: RAGContext | None = None
session_state: Any | None = None
sessionState: Any | None = None


class ItemPublic(BaseModel):
Expand Down
3 changes: 2 additions & 1 deletion src/frontend/src/pages/chat/Chat.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,8 @@ const Chat = () => {
prompt_template: promptTemplate.length === 0 ? undefined : promptTemplate,
temperature: temperature
}
}
},
sessionState: answers.length ? answers[answers.length - 1][1].sessionState : null
};
const chatClient: AIChatProtocolClient = new AIChatProtocolClient("/chat");
if (shouldStream) {
Expand Down
192 changes: 192 additions & 0 deletions tests/e2e.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
import socket
import time
from collections.abc import Generator
from contextlib import closing
from multiprocessing import Process

import pytest
import requests
import uvicorn
from playwright.sync_api import Page, Route, expect

import fastapi_app as app

expect.set_options(timeout=10_000)


def wait_for_server_ready(url: str, timeout: float = 10.0, check_interval: float = 0.5) -> bool:
"""Make requests to provided url until it responds without error."""
conn_error = None
for _ in range(int(timeout / check_interval)):
try:
requests.get(url)
except requests.ConnectionError as exc:
time.sleep(check_interval)
conn_error = str(exc)
else:
return True
raise RuntimeError(conn_error)


@pytest.fixture(scope="session")
def free_port() -> int:
"""Returns a free port for the test server to bind."""
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
s.bind(("", 0))
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
return s.getsockname()[1]


def run_server(port: int):
uvicorn.run(app.create_app(testing=True), port=port)


@pytest.fixture()
def live_server_url(mock_session_env, free_port: int) -> Generator[str, None, None]:
proc = Process(target=run_server, args=(free_port,), daemon=True)
proc.start()
url = f"http://localhost:{free_port}/"
wait_for_server_ready(url, timeout=10.0, check_interval=0.5)
yield url
proc.kill()


@pytest.fixture(params=[(480, 800), (600, 1024), (768, 1024), (992, 1024), (1024, 768)])
def sized_page(page: Page, request):
size = request.param
page.set_viewport_size({"width": size[0], "height": size[1]})
yield page


def test_home(page: Page, live_server_url: str):
page.goto(live_server_url)
expect(page).to_have_title("RAG on PostgreSQL")


def test_chat(sized_page: Page, live_server_url: str):
page = sized_page

# Set up a mock route to the /chat endpoint with streaming results
def handle(route: Route):
# Assert that session_state is specified in the request (None for now)
if route.request.post_data_json:
session_state = route.request.post_data_json["sessionState"]
assert session_state is None
# Read the JSONL from our snapshot results and return as the response
f = open(
"tests/snapshots/test_api_routes/test_advanced_chat_streaming_flow/advanced_chat_streaming_flow_response.jsonlines"
)
jsonl = f.read()
f.close()
route.fulfill(body=jsonl, status=200, headers={"Transfer-encoding": "Chunked"})

page.route("*/**/chat/stream", handle)

# Check initial page state
page.goto(live_server_url)
expect(page).to_have_title("RAG on PostgreSQL")
expect(page.get_by_role("heading", name="Product chat")).to_be_visible()
expect(page.get_by_role("button", name="Clear chat")).to_be_disabled()
expect(page.get_by_role("button", name="Developer settings")).to_be_enabled()

# Ask a question and wait for the message to appear
page.get_by_placeholder("Type a new question (e.g. does my plan cover annual eye exams?)").click()
page.get_by_placeholder("Type a new question (e.g. does my plan cover annual eye exams?)").fill(
"Whats the dental plan?"
)
page.get_by_role("button", name="Ask question button").click()

expect(page.get_by_text("Whats the dental plan?")).to_be_visible()
expect(page.get_by_text("The capital of France is Paris.")).to_be_visible()
expect(page.get_by_role("button", name="Clear chat")).to_be_enabled()

# Show the thought process
page.get_by_label("Show thought process").click()
expect(page.get_by_title("Thought process")).to_be_visible()
expect(page.get_by_text("Prompt to generate search arguments")).to_be_visible()

# Clear the chat
page.get_by_role("button", name="Clear chat").click()
expect(page.get_by_text("Whats the dental plan?")).not_to_be_visible()
expect(page.get_by_text("The capital of France is Paris.")).not_to_be_visible()
expect(page.get_by_role("button", name="Clear chat")).to_be_disabled()


def test_chat_customization(page: Page, live_server_url: str):
# Set up a mock route to the /chat endpoint
def handle(route: Route):
if route.request.post_data_json:
overrides = route.request.post_data_json["context"]["overrides"]
assert overrides["use_advanced_flow"] is False
assert overrides["retrieval_mode"] == "vectors"
assert overrides["top"] == 1
assert overrides["prompt_template"] == "You are a cat and only talk about tuna."

# Read the JSON from our snapshot results and return as the response
f = open("tests/snapshots/test_api_routes/test_simple_chat_flow/simple_chat_flow_response.json")
json = f.read()
f.close()
route.fulfill(body=json, status=200)

page.route("*/**/chat", handle)

# Check initial page state
page.goto(live_server_url)
expect(page).to_have_title("RAG on PostgreSQL")

# Customize all the settings
page.get_by_role("button", name="Developer settings").click()
page.get_by_text(
"Use advanced flow with query rewriting and filter formulation. Not compatible with Ollama models."
).click()
page.get_by_label("Retrieve this many matching rows:").click()
page.get_by_label("Retrieve this many matching rows:").fill("1")
page.get_by_text("Vectors + Text (Hybrid)").click()
page.get_by_role("option", name="Vectors", exact=True).click()
page.get_by_label("Override prompt template").click()
page.get_by_label("Override prompt template").fill("You are a cat and only talk about tuna.")

page.get_by_text("Stream chat completion responses").click()
page.locator("button").filter(has_text="Close").click()

# Ask a question and wait for the message to appear
page.get_by_placeholder("Type a new question (e.g. does my plan cover annual eye exams?)").click()
page.get_by_placeholder("Type a new question (e.g. does my plan cover annual eye exams?)").fill(
"Whats the dental plan?"
)
page.get_by_role("button", name="Ask question button").click()

expect(page.get_by_text("Whats the dental plan?")).to_be_visible()
expect(page.get_by_text("The capital of France is Paris.")).to_be_visible()
expect(page.get_by_role("button", name="Clear chat")).to_be_enabled()


def test_chat_nonstreaming(page: Page, live_server_url: str):
# Set up a mock route to the /chat_stream endpoint
def handle(route: Route):
# Read the JSON from our snapshot results and return as the response
f = open("tests/snapshots/test_api_routes/test_advanced_chat_flow/advanced_chat_flow_response.json")
json = f.read()
f.close()
route.fulfill(body=json, status=200)

page.route("*/**/chat", handle)

# Check initial page state
page.goto(live_server_url)
expect(page).to_have_title("RAG on PostgreSQL")
expect(page.get_by_role("button", name="Developer settings")).to_be_enabled()
page.get_by_role("button", name="Developer settings").click()
page.get_by_text("Stream chat completion responses").click()
page.locator("button").filter(has_text="Close").click()

# Ask a question and wait for the message to appear
page.get_by_placeholder("Type a new question (e.g. does my plan cover annual eye exams?)").click()
page.get_by_placeholder("Type a new question (e.g. does my plan cover annual eye exams?)").fill(
"Whats the dental plan?"
)
page.get_by_label("Ask question button").click()

expect(page.get_by_text("Whats the dental plan?")).to_be_visible()
expect(page.get_by_text("The capital of France is Paris.")).to_be_visible()
expect(page.get_by_role("button", name="Clear chat")).to_be_enabled()
Original file line number Diff line number Diff line change
Expand Up @@ -64,5 +64,5 @@
],
"followup_questions": null
},
"session_state": null
"sessionState": null
}
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
{"delta":null,"context":{"data_points":{"1":{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}},"thoughts":[{"title":"Prompt to generate search arguments","description":["{'role': 'system', 'content': 'Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching database rows.\\nYou have access to an Azure PostgreSQL database with an items table that has columns for title, description, brand, price, and type.\\nGenerate a search query based on the conversation and the new question.\\nIf the question is not in English, translate the question to English before generating the search query.\\nIf you cannot generate a search query, return the original user question.\\nDO NOT return anything besides the query.'}","{'role': 'user', 'content': 'What is the capital of France?'}"],"props":{"model":"gpt-35-turbo","deployment":"gpt-35-turbo"}},{"title":"Search using generated search arguments","description":"The capital of France is Paris. [Benefit_Options-2.pdf].","props":{"top":1,"vector_search":true,"text_search":true,"filters":[]}},{"title":"Search results","description":[{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}],"props":{}},{"title":"Prompt to generate answer","description":["{'role': 'system', 'content': \"Assistant helps customers with questions about products.\\nRespond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.\\nAnswer ONLY with the product details listed in the products.\\nIf there isn't enough information below, say you don't know.\\nDo not generate answers that don't use the sources below.\\nEach product has an ID in brackets followed by colon and the product details.\\nAlways include the product ID for each product you use in the response.\\nUse square brackets to reference the source, for example [52].\\nDon't combine citations, list each product separately, for example [27][51].\"}","{'role': 'user', 'content': \"What is the capital of France?\\n\\nSources:\\n[1]:Name:Wanderer Black Hiking Boots Description:Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long. Price:109.99 Brand:Daybird Type:Footwear\\n\\n\"}"],"props":{"model":"gpt-35-turbo","deployment":"gpt-35-turbo"}}],"followup_questions":null},"session_state":null}
{"delta":{"content":"The capital of France is Paris. [Benefit_Options-2.pdf].","role":"assistant"},"context":null,"session_state":null}
{"delta":null,"context":{"data_points":{"1":{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}},"thoughts":[{"title":"Prompt to generate search arguments","description":["{'role': 'system', 'content': 'Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching database rows.\\nYou have access to an Azure PostgreSQL database with an items table that has columns for title, description, brand, price, and type.\\nGenerate a search query based on the conversation and the new question.\\nIf the question is not in English, translate the question to English before generating the search query.\\nIf you cannot generate a search query, return the original user question.\\nDO NOT return anything besides the query.'}","{'role': 'user', 'content': 'What is the capital of France?'}"],"props":{"model":"gpt-35-turbo","deployment":"gpt-35-turbo"}},{"title":"Search using generated search arguments","description":"The capital of France is Paris. [Benefit_Options-2.pdf].","props":{"top":1,"vector_search":true,"text_search":true,"filters":[]}},{"title":"Search results","description":[{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}],"props":{}},{"title":"Prompt to generate answer","description":["{'role': 'system', 'content': \"Assistant helps customers with questions about products.\\nRespond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.\\nAnswer ONLY with the product details listed in the products.\\nIf there isn't enough information below, say you don't know.\\nDo not generate answers that don't use the sources below.\\nEach product has an ID in brackets followed by colon and the product details.\\nAlways include the product ID for each product you use in the response.\\nUse square brackets to reference the source, for example [52].\\nDon't combine citations, list each product separately, for example [27][51].\"}","{'role': 'user', 'content': \"What is the capital of France?\\n\\nSources:\\n[1]:Name:Wanderer Black Hiking Boots Description:Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long. Price:109.99 Brand:Daybird Type:Footwear\\n\\n\"}"],"props":{"model":"gpt-35-turbo","deployment":"gpt-35-turbo"}}],"followup_questions":null},"sessionState":null}
{"delta":{"content":"The capital of France is Paris. [Benefit_Options-2.pdf].","role":"assistant"},"context":null,"sessionState":null}
Original file line number Diff line number Diff line change
Expand Up @@ -52,5 +52,5 @@
],
"followup_questions": null
},
"session_state": null
"sessionState": null
}
Loading
Loading