Skip to content

Commit 64cf758

Browse files
authored
Merge pull request rohit-ganguly#5 from rohit-ganguly/newdata
Add new data from Rohit's branch
2 parents 22f69f7 + 56effc6 commit 64cf758

File tree

10 files changed

+361506
-90515
lines changed

10 files changed

+361506
-90515
lines changed

.devcontainer/devcontainer.json

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,7 @@
3131
"ms-python.vscode-pylance",
3232
"ms-python.vscode-python-envs",
3333
"charliermarsh.ruff",
34-
"mtxr.sqltools",
35-
"mtxr.sqltools-driver-pg",
34+
// TODO: Add PostgreSQL extension once its in marketplace
3635
"esbenp.prettier-vscode",
3736
"mechatroner.rainbow-csv",
3837
"ms-vscode.vscode-node-azure-pack",

.vscode/settings.json

Lines changed: 13 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,20 @@
11
{
2-
"sqltools.connections": [
2+
"pgsql.connections": [
33
{
4-
"previewLimit": 50,
4+
"id": "92CC1089-BAD0-44A4-B071-A50A6EC12B67",
5+
"groupId": "F3347CD6-9995-4EE9-8D98-D88DB010FA5B",
6+
"authenticationType": "SqlLogin",
7+
"connectTimeout": 15,
8+
"applicationName": "vscode-pgsql",
9+
"clientEncoding": "utf8",
10+
"sslmode": "prefer",
511
"server": "localhost",
6-
"port": 5432,
7-
"driver": "PostgreSQL",
8-
"name": "local",
12+
"user": "admin",
13+
"password": "",
14+
"savePassword": true,
915
"database": "postgres",
10-
"username": "admin",
11-
"password": "postgres"
12-
},
13-
{
14-
"name": "Azure database",
15-
"driver": "PostgreSQL",
16-
"server": "<HOSTNAME>.postgres.database.azure.com",
17-
"port": 5432,
18-
"database": "postgres",
19-
"username": "<USERNAME>",
20-
"askForPassword": true,
21-
"pgOptions": {
22-
"ssl": true
23-
}
16+
"profileName": "local-pg",
17+
"expiresOn": 0
2418
}
2519
],
2620
"python.testing.pytestArgs": [

convert_csv_json.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import json
44

55
# Read CSV file - Using the correct dialect to handle quotes properly
6-
with open("data.csv", encoding="utf-8") as csv_file:
6+
with open("pittsburgh_restaurants.csv", encoding="utf-8") as csv_file:
77
# Use the csv.reader with proper quoting parameters
88
csv_reader = csv.reader(csv_file, quoting=csv.QUOTE_ALL, doublequote=True, escapechar="\\")
99
header = next(csv_reader) # Get the header row
@@ -42,10 +42,17 @@
4242
item[header[i]] = value
4343
# remove is_open column
4444
del item["is_open"]
45+
del item["hours"]
46+
del item["neighborhood"]
47+
del item["tags"]
48+
del item["vibe"]
49+
del item["top_reviews"]
50+
if item["price_level"] == "":
51+
item["price_level"] = 2.0 # Assume 2.0 if empty
4552
json_data.append(item)
4653

4754
# Write to JSON file
48-
with open("data.json", "w", encoding="utf-8") as f:
55+
with open("src/backend/fastapi_app/seed_data.json", "w", encoding="utf-8") as f:
4956
json.dump(json_data, f, indent=4, ensure_ascii=False)
5057

5158
print(f"Successfully converted CSV data to JSON format with {len(json_data)} records")

pittsburgh_restaurants.csv

Lines changed: 201 additions & 0 deletions
Large diffs are not rendered by default.

src/backend/fastapi_app/api_models.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,22 +42,17 @@ class ChatRequest(BaseModel):
4242

4343

4444
class ItemPublic(BaseModel):
45-
id: int
45+
id: str
4646
name: str
47-
location: str
4847
cuisine: str
4948
rating: int
5049
price_level: int
5150
review_count: int
52-
hours: str
53-
tags: list[str]
5451
description: str
5552
menu_summary: str
56-
top_reviews: str
57-
vibe: str
5853

5954
def to_str_for_rag(self):
60-
return f"Name:{self.name} Description:{self.description} Location:{self.location} Cuisine:{self.cuisine} Rating:{self.rating} Price Level:{self.price_level} Review Count:{self.review_count} Hours:{self.hours} Tags:{self.tags} Menu Summary:{self.menu_summary} Top Reviews:{self.top_reviews} Vibe:{self.vibe}" # noqa: E501
55+
return f"Name:{self.name} Description:{self.description} Cuisine:{self.cuisine} Rating:{self.rating} Price Level:{self.price_level} Review Count:{self.review_count} Menu Summary:{self.menu_summary}" # noqa: E501
6156

6257

6358
class ItemWithDistance(ItemPublic):
@@ -75,7 +70,7 @@ class ThoughtStep(BaseModel):
7570

7671

7772
class RAGContext(BaseModel):
78-
data_points: dict[int, ItemPublic]
73+
data_points: dict[str, ItemPublic]
7974
thoughts: list[ThoughtStep]
8075
followup_questions: Optional[list[str]] = None
8176

src/backend/fastapi_app/postgres_models.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
from __future__ import annotations
22

33
from pgvector.sqlalchemy import Vector
4-
from sqlalchemy import VARCHAR, Index
5-
from sqlalchemy.dialects import postgresql
4+
from sqlalchemy import Index
65
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
76

87

@@ -13,19 +12,14 @@ class Base(DeclarativeBase):
1312

1413
class Item(Base):
1514
__tablename__ = "items"
16-
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
15+
id: Mapped[str] = mapped_column(primary_key=True)
1716
name: Mapped[str] = mapped_column()
18-
location: Mapped[str] = mapped_column()
1917
cuisine: Mapped[str] = mapped_column()
2018
rating: Mapped[int] = mapped_column()
2119
price_level: Mapped[int] = mapped_column()
2220
review_count: Mapped[int] = mapped_column()
23-
hours: Mapped[str] = mapped_column()
24-
tags: Mapped[list[str]] = mapped_column(postgresql.ARRAY(VARCHAR)) # Array of strings
2521
description: Mapped[str] = mapped_column()
2622
menu_summary: Mapped[str] = mapped_column()
27-
top_reviews: Mapped[str] = mapped_column()
28-
vibe: Mapped[str] = mapped_column()
2923

3024
# Embeddings for different models:
3125
embedding_3l: Mapped[Vector] = mapped_column(Vector(1024), nullable=True) # text-embedding-3-large
@@ -42,10 +36,10 @@ def to_dict(self, include_embedding: bool = False):
4236
return model_dict
4337

4438
def to_str_for_rag(self):
45-
return f"Name:{self.name} Description:{self.description} Location:{self.location} Cuisine:{self.cuisine} Rating:{self.rating} Price Level:{self.price_level} Review Count:{self.review_count} Hours:{self.hours} Tags:{self.tags} Menu Summary:{self.menu_summary} Top Reviews:{self.top_reviews} Vibe:{self.vibe}" # noqa: E501
39+
return f"Name:{self.name} Description:{self.description} Cuisine:{self.cuisine} Rating:{self.rating} Price Level:{self.price_level} Review Count:{self.review_count} Menu Summary:{self.menu_summary}" # noqa: E501
4640

4741
def to_str_for_embedding(self):
48-
return f"Name: {self.name} Description: {self.description} Cuisine: {self.cuisine} Tags: {self.tags} Menu Summary: {self.menu_summary} Top Reviews: {self.top_reviews} Vibe: {self.vibe}" # noqa: E501
42+
return f"Name: {self.name} Description: {self.description} Cuisine: {self.cuisine} Menu Summary: {self.menu_summary}" # noqa: E501
4943

5044

5145
"""

src/backend/fastapi_app/rag_advanced.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,8 @@ async def prepare_context(self) -> tuple[list[ItemPublic], list[ThoughtStep]]:
119119
most_recent_response = run_results.new_items[-1]
120120
if isinstance(most_recent_response, ToolCallOutputItem):
121121
search_results = most_recent_response.output
122+
if not isinstance(search_results, SearchResults):
123+
raise ValueError(f"Error retrieving search results: {search_results}")
122124
else:
123125
raise ValueError("Error retrieving search results, model did not call tool properly")
124126

0 commit comments

Comments
 (0)