Skip to content

Commit fb07c64

Browse files
authored
add metadata to products for LLM use (#6717)
1 parent 9befbcd commit fb07c64

File tree

6 files changed

+320
-16
lines changed

6 files changed

+320
-16
lines changed

kitsune/llm/questions/classifiers.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,9 @@ def handle_spam(payload: dict[str, Any], spam_result: dict[str, Any]) -> dict[st
6262
if not ((action == ModerationAction.SPAM) and spam_result.get("maybe_misclassified")):
6363
return {"action": action, "product_result": {}}
6464

65-
payload["products"] = get_products(only_with_forums=True, output_format="JSON")
65+
payload["products"] = get_products(
66+
only_with_forums=True, include_metadata=["description"], output_format="JSON"
67+
)
6668
product_result = product_classification_chain.invoke(payload)
6769
new_product = product_result.get("product")
6870

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Generated by Django 4.2.22 on 2025-06-10 17:00
2+
3+
from django.db import migrations, models
4+
import kitsune.sumo.utils
5+
6+
7+
class Migration(migrations.Migration):
8+
9+
dependencies = [
10+
("products", "0024_auto_20250520_1423"),
11+
]
12+
13+
operations = [
14+
migrations.AddField(
15+
model_name="product",
16+
name="metadata",
17+
field=models.JSONField(
18+
default=dict,
19+
encoder=kitsune.sumo.utils.PrettyJSONEncoder,
20+
help_text="Data useful for things like LLM prompts.",
21+
),
22+
),
23+
]
Lines changed: 218 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,218 @@
1+
from django.db import migrations
2+
3+
4+
PRODUCTS_WITH_METADATA = [
5+
{
6+
"title": "Firefox",
7+
"metadata": {
8+
"description": (
9+
"Firefox is a free and open-source web browser. It is available on Windows,"
10+
" macOS, and Linux. It offers many features including private browsing mode,"
11+
" tracking protection, a Sync feature for synchronizing bookmarks, passwords,"
12+
" tabs, and more across all of your devices, a free password manager, a"
13+
" bookmark manager for organizing your bookmarks with folders and tags, themes"
14+
" for customizing its look, pinned tabs, choice of vertical or horizontal tabs,"
15+
" a PDF editor and viewer, website translation, picture-in-picture mode, an"
16+
" eyedropper tool that identifies and captures colors on the web, and many"
17+
" add-ons and extensions."
18+
),
19+
},
20+
},
21+
{
22+
"title": "Firefox for Android",
23+
"metadata": {
24+
"description": (
25+
"Firefox for Android is a free and open-source web browser that provides a"
26+
" mobile-friendly interface optimized for Android devices."
27+
),
28+
},
29+
},
30+
{
31+
"title": "Firefox for iOS",
32+
"metadata": {
33+
"description": (
34+
"Firefox for iOS is a free and open-source web browser that provides a"
35+
" mobile-friendly interface optimized for iOS devices."
36+
),
37+
},
38+
},
39+
{
40+
"title": "Mozilla VPN",
41+
"metadata": {
42+
"description": (
43+
"Mozilla VPN is a virtual private network service that protects your online"
44+
" privacy and security by masking your IP address, hiding your location data,"
45+
" and encrypting your network activity. It is supported on Windows, macOS,"
46+
" Android, iOS, and Linux operating systems. It has no bandwidth restrictions"
47+
" or throttling, uses the fast and secure Wireguard VPN protocol, supports"
48+
' custom DNS, and offers a "Multi-Hop" feature that allows you to encrypt'
49+
" and route your internet traffic through two server locations rather than"
50+
" just one."
51+
),
52+
},
53+
},
54+
{
55+
"title": "Firefox Relay",
56+
"metadata": {
57+
"description": (
58+
"Firefox Relay is a service that lets you use an email and/or phone alias"
59+
" instead of your real email and/or phone number when filling out online"
60+
" forms or signing up for services. An alias forwards your incoming emails"
61+
" or phone calls to your real email address or phone number. This helps"
62+
" protect your privacy and reduces spam by keeping your real email address"
63+
" or phone number hidden. You can manage your aliases, including blocking or"
64+
" deleting them. Firefox Relay offers a free service with a limited number"
65+
" of email aliases, and also a paid Premium service that offers more features"
66+
" like phone aliases, unlimited email aliases, and support. Relay is also"
67+
" available as a Firefox extension, making it easy to integrate into your web"
68+
" browsing experience."
69+
),
70+
},
71+
},
72+
{
73+
"title": "Mozilla Monitor",
74+
"metadata": {
75+
"description": (
76+
"Mozilla Monitor is a service that helps users protect their online identity"
77+
" and data. It provides both a free service and a subscription-based service"
78+
" called Monitor Plus. The core function of Mozilla Monitor is to alert users"
79+
" about potential data breaches and data broker exposures, offering guidance"
80+
" on how to mitigate risks and remove personal information from data broker"
81+
" websites. It scans a database of known data breaches and notifies users if"
82+
" their email addresses or other information have been compromised. It also"
83+
" allows users to scan their personal information against a list of data"
84+
" brokers to see if it has been exposed. Monitor Plus, the subscription-based"
85+
" service, provides continuous monitoring for data breaches and data broker"
86+
" exposures, alerts users to new vulnerabilities, and automatically removes"
87+
" personal information from data broker websites."
88+
),
89+
},
90+
},
91+
{
92+
"title": "MDN Plus",
93+
"metadata": {
94+
"description": (
95+
"MDN Plus is a subscription service built on top of the Mozilla Developer"
96+
" Network (MDN), offering premium features like notifications, collections,"
97+
" and offline access for a more personalized and productive web development"
98+
" experience. It allows users to customize their MDN Web Docs experience,"
99+
" including managing collections, subscribing to updates, and using MDN"
100+
" offline. Collections allow premium users to organize their favorite MDN"
101+
" content in curated collections for easy access. Notifications allow premium"
102+
" users to stay informed about changes to web platform technologies by"
103+
" subscribing to updates in the content they care about. Offline access allows"
104+
" premium users complete access to MDN Web Docs without an internet connection."
105+
),
106+
},
107+
},
108+
{
109+
"title": "Firefox Focus",
110+
"metadata": {
111+
"description": (
112+
"Firefox Focus is a privacy-focused web browser designed for mobile devices"
113+
" (Android and iOS). Firefox Focus is designed to block online trackers,"
114+
" including third-party advertising, to protect users' privacy. It has a simple"
115+
" interface with just one tab, no bookmarks, and no saved history. When you're"
116+
" finished browsing, you can easily clear your session and all associated data"
117+
" (history, passwords, cookies) with a tap. By blocking trackers and ads, Firefox"
118+
" Focus helps pages load faster and reduces data usage."
119+
),
120+
},
121+
},
122+
{
123+
"title": "Firefox for Enterprise",
124+
"metadata": {
125+
"description": (
126+
"Firefox for Enterprise is a web browser solution tailored for businesses and"
127+
" organizations that need robust deployment, management, and extended support."
128+
" It provides tools like MSI installers (for Windows), Group Policy support"
129+
" (Windows ADM/ADMX templates), and configuration files (like policies.json"
130+
" and MacOS plist files) to facilitate large-scale deployment and customization"
131+
" within organizations. This allows administrators to set browser policies,"
132+
" manage add-ons, and customize installations across multiple computers. It"
133+
" provides a choice between two different release channels, Rapid Release and"
134+
" Extended Support Release (ESR). The Rapid Release channel aligns with the"
135+
" standard Firefox release cycle and offers the latest features, while the ESR"
136+
" channel offers a longer support cycle, providing greater stability and fewer"
137+
" feature updates, which is beneficial for organizations that prioritize"
138+
" consistent browser environments and minimizing update-related compatibility"
139+
" issues. In essence, Firefox for Enterprise is not a separate browser, but"
140+
" rather the Firefox browser bundled with specific tools and support options"
141+
" designed to meet the needs of businesses and organizations, particularly with"
142+
" regard to deployment, management, and release cadence control."
143+
),
144+
},
145+
},
146+
{
147+
"title": "Thunderbird",
148+
"metadata": {
149+
"description": (
150+
"Thunderbird is a free and open-source email client that allows users to manage"
151+
" multiple email accounts in one place. It's available for Windows, macOS, and"
152+
" Linux. Thunderbird serves as a central hub for managing multiple email"
153+
" accounts, supporting various providers like Gmail, Outlook, Yahoo, and more."
154+
" You can consolidate all your incoming messages into a single, unified inbox"
155+
" for easy viewing, while still identifying the account each email belongs to"
156+
" through color-coding. Thunderbird offers various features to help you manage"
157+
" your emails, including quick search, saved search folders (virtual folders),"
158+
" advanced filtering, message grouping, and tags. It also offers an adaptive"
159+
" filter that learns from your actions which types of messages are legitimate"
160+
" and which are junk, to help manage unwanted emails. Unlike web-based email,"
161+
" Thunderbird stores emails locally on your computer, allowing you to access"
162+
" and search them even without an internet connection. It adheres to industry"
163+
" standards for email, including the POP and IMAP protocols."
164+
),
165+
},
166+
},
167+
{
168+
"title": "Thunderbird for Android",
169+
"metadata": {
170+
"description": (
171+
"Thunderbird for Android is a free and open-source email client for Android"
172+
" mobile devices. It provides a mobile-friendly interface optimized for Android"
173+
" devices that allows users to manage multiple email accounts in one place,"
174+
" supporting various providers like Gmail, Outlook, Yahoo, and more. You can"
175+
" consolidate all your incoming messages into a single, unified inbox"
176+
" for easy viewing, while still identifying the account each email belongs to"
177+
" through color-coding. It offers various features to help you manage your"
178+
" emails, including quick search, saved search folders (virtual folders),"
179+
" advanced filtering, message grouping, and tags. It also offers an adaptive"
180+
" filter that learns from your actions which types of messages are legitimate"
181+
" and which are junk, to help manage unwanted emails. Unlike web-based email,"
182+
" Thunderbird stores emails locally on your device, allowing you to access"
183+
" and search them even without an internet connection. It adheres to industry"
184+
" standards for email, including the POP and IMAP protocols."
185+
),
186+
},
187+
},
188+
]
189+
190+
191+
def add_initial_product_metadata(apps, schema_editor):
192+
Product = apps.get_model("products", "Product")
193+
194+
for product_data in PRODUCTS_WITH_METADATA:
195+
try:
196+
product = Product.objects.get(title=product_data["title"], is_archived=False)
197+
except Product.DoesNotExist:
198+
print(f"""Skipped product "{product_data['title']}" because it does not exist.""")
199+
continue
200+
except Product.MultipleObjectsReturned:
201+
print(
202+
f"""Skipped product "{product_data['title']}" because it has multiple objects."""
203+
)
204+
continue
205+
206+
product.metadata = product_data["metadata"]
207+
product.save()
208+
209+
210+
class Migration(migrations.Migration):
211+
212+
dependencies = [
213+
("products", "0025_product_metadata"),
214+
]
215+
216+
operations = [
217+
migrations.RunPython(add_initial_product_metadata, migrations.RunPython.noop),
218+
]

kitsune/products/models.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,11 @@ class BaseProductTopic(ModelBase):
2323
is_archived = models.BooleanField(default=False)
2424
# Dictates the display order in lists
2525
display_order = models.IntegerField()
26+
metadata = models.JSONField(
27+
default=dict,
28+
encoder=PrettyJSONEncoder,
29+
help_text=_lazy("Data useful for things like LLM prompts."),
30+
)
2631

2732
class Meta:
2833
abstract = True
@@ -113,12 +118,6 @@ class Topic(BaseProductTopic):
113118
default=False, help_text=_lazy("Whether this topic is shown in navigation menus.")
114119
)
115120

116-
metadata = models.JSONField(
117-
default=dict,
118-
encoder=PrettyJSONEncoder,
119-
help_text=_lazy("Data useful for things like LLM prompts."),
120-
)
121-
122121
class Meta(object):
123122
ordering = ["title", "display_order"]
124123

kitsune/products/tests/test_utils.py

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -414,28 +414,49 @@ class GetProductsTests(TestCase):
414414

415415
def setUp(self):
416416
ProductFactory(
417-
title="product1", description="All about product1...", display_order=1, slug="p1"
417+
title="product1",
418+
description="All about product1...",
419+
metadata={
420+
"description": "Detailed description of product1...",
421+
},
422+
display_order=1,
423+
slug="p1",
418424
)
419425
ProductFactory(
420-
title="product2", description="All about product2...", display_order=2, slug="p2"
426+
title="product2",
427+
description="All about product2...",
428+
metadata={
429+
"description": "Detailed description of product2...",
430+
},
431+
display_order=2,
432+
slug="p2",
421433
)
422434
ProductFactory(
423435
title="product3",
424436
description="All about product3...",
437+
metadata={
438+
"description": "Detailed description of product3...",
439+
},
425440
display_order=3,
426441
slug="mozilla-account",
427442
visible=False,
428443
)
429444
ProductFactory(
430445
title="product4",
431446
description="All about product4...",
447+
metadata={
448+
"description": "Detailed description of product4...",
449+
},
432450
display_order=4,
433451
slug="p4",
434452
visible=False,
435453
)
436454
ProductFactory(
437455
title="product5",
438456
description="All about product5...",
457+
metadata={
458+
"description": "Detailed description of product5...",
459+
},
439460
display_order=5,
440461
slug="p5",
441462
is_archived=True,
@@ -470,3 +491,35 @@ def test_get_products_as_json(self):
470491
]
471492
}"""
472493
self.assertEqual(get_products(output_format="JSON"), expected)
494+
495+
def test_get_products_with_metadata(self):
496+
expected = """products:
497+
- title: product1
498+
description: Detailed description of product1...
499+
- title: product2
500+
description: Detailed description of product2...
501+
- title: product3
502+
description: Detailed description of product3...
503+
"""
504+
self.assertEqual(get_products(include_metadata=["description"]), expected)
505+
506+
def test_get_products_with_metadata_as_json(self):
507+
expected = """{
508+
"products": [
509+
{
510+
"title": "product1",
511+
"description": "Detailed description of product1..."
512+
},
513+
{
514+
"title": "product2",
515+
"description": "Detailed description of product2..."
516+
},
517+
{
518+
"title": "product3",
519+
"description": "Detailed description of product3..."
520+
}
521+
]
522+
}"""
523+
self.assertEqual(
524+
get_products(include_metadata=["description"], output_format="JSON"), expected
525+
)

0 commit comments

Comments
 (0)