Skip to content

Commit d39317b

Browse files
feat: update typesense search index on rfc pub/update (#10575)
* chore: typesense API config for k8s * feat: DocumentInfo.pub_datetime() helper * chore(deps): install typesense library * feat: searchindex (typesense) util module * feat: sanitize abstract * feat: add (sanitized) content * style: ruff ruff on doc/tasks.py * feat: search index update task * chore: call the update task * refactor: better settings management * ci: update prod settings * chore: typing * test: searchindex tests * test: searchindex task test * style: ruff ruff * chore: drop type hints to fix mypy errors * test: fix tests * test: improve coverage * fix: handle missing content blob correctly
1 parent b08945a commit d39317b

11 files changed

Lines changed: 451 additions & 50 deletions

ietf/api/serializers_rpc.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright The IETF Trust 2025, All Rights Reserved
1+
# Copyright The IETF Trust 2025-2026, All Rights Reserved
22
import datetime
33
from pathlib import Path
44
from typing import Literal, Optional
@@ -20,6 +20,7 @@
2020
RfcAuthor,
2121
)
2222
from ietf.doc.serializers import RfcAuthorSerializer
23+
from ietf.doc.tasks import update_rfc_searchindex_task
2324
from ietf.doc.utils import (
2425
default_consensus,
2526
prettify_std_name,
@@ -682,6 +683,8 @@ def update(self, instance, validated_data):
682683
stale_subseries_relations.delete()
683684
if len(rfc_events) > 0:
684685
rfc.save_with_history(rfc_events)
686+
687+
update_rfc_searchindex_task.delay(rfc.rfc_number)
685688
return rfc
686689

687690

ietf/api/tests_serializers_rpc.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
# Copyright The IETF Trust 2026, All Rights Reserved
2+
from unittest import mock
3+
24
from django.utils import timezone
35

46
from ietf.utils.test_utils import TestCase
@@ -32,7 +34,8 @@ def test_create(self):
3234
with self.assertRaises(RuntimeError, msg="serializer does not allow create()"):
3335
serializer.save()
3436

35-
def test_update(self):
37+
@mock.patch("ietf.api.serializers_rpc.update_rfc_searchindex_task")
38+
def test_update(self, mock_update_searchindex_task):
3639
rfc = WgRfcFactory(pages=10)
3740
serializer = EditableRfcSerializer(
3841
instance=rfc,
@@ -56,6 +59,11 @@ def test_update(self):
5659
)
5760
self.assertTrue(serializer.is_valid())
5861
result = serializer.save()
62+
self.assertTrue(mock_update_searchindex_task.delay.called)
63+
self.assertEqual(
64+
mock_update_searchindex_task.delay.call_args,
65+
mock.call(rfc.rfc_number),
66+
)
5967
result.refresh_from_db()
6068
self.assertEqual(result.title, "Yadda yadda yadda")
6169
self.assertEqual(
@@ -84,7 +92,8 @@ def test_update(self):
8492
[Document.objects.get(name="fyi999")],
8593
)
8694

87-
def test_partial_update(self):
95+
@mock.patch("ietf.api.serializers_rpc.update_rfc_searchindex_task")
96+
def test_partial_update(self, mock_update_searchindex_task):
8897
# We could test other permutations of fields, but authors is a partial update
8998
# we know we are going to use, so verifying that one in particular.
9099
rfc = WgRfcFactory(pages=10, abstract="do or do not", title="padawan")
@@ -104,6 +113,11 @@ def test_partial_update(self):
104113
)
105114
self.assertTrue(serializer.is_valid())
106115
result = serializer.save()
116+
self.assertTrue(mock_update_searchindex_task.delay.called)
117+
self.assertEqual(
118+
mock_update_searchindex_task.delay.call_args,
119+
mock.call(rfc.rfc_number),
120+
)
107121
result.refresh_from_db()
108122
self.assertEqual(rfc.title, "padawan")
109123
self.assertEqual(

ietf/api/tests_views_rpc.py

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,8 @@ def test_notify_rfc_published(self, mock_task_delay):
196196
self.assertEqual(mock_kwargs["rfc_number_list"], expected_rfc_number_list)
197197

198198
@override_settings(APP_API_TOKENS={"ietf.api.views_rpc": ["valid-token"]})
199-
def test_upload_rfc_files(self):
199+
@mock.patch("ietf.api.views_rpc.update_rfc_searchindex_task")
200+
def test_upload_rfc_files(self, mock_update_searchindex_task):
200201
def _valid_post_data():
201202
"""Generate a valid post data dict
202203
@@ -217,14 +218,7 @@ def _valid_post_data():
217218
}
218219

219220
url = urlreverse("ietf.api.purple_api.upload_rfc_files")
220-
unused_rfc_number = (
221-
Document.objects.filter(rfc_number__isnull=False).aggregate(
222-
unused_rfc_number=Max("rfc_number") + 1
223-
)["unused_rfc_number"]
224-
or 10000
225-
)
226-
227-
rfc = WgRfcFactory(rfc_number=unused_rfc_number)
221+
rfc = WgRfcFactory()
228222
assert isinstance(rfc, Document), "WgRfcFactory should generate a Document"
229223
with TemporaryDirectory() as rfc_dir:
230224
settings.RFC_PATH = rfc_dir # affects overridden settings
@@ -236,15 +230,17 @@ def _valid_post_data():
236230
# no api key
237231
r = self.client.post(url, _valid_post_data(), format="multipart")
238232
self.assertEqual(r.status_code, 403)
233+
self.assertFalse(mock_update_searchindex_task.delay.called)
239234

240235
# invalid RFC
241236
r = self.client.post(
242237
url,
243-
_valid_post_data() | {"rfc": unused_rfc_number + 1},
238+
_valid_post_data() | {"rfc": rfc.rfc_number + 10},
244239
format="multipart",
245240
headers={"X-Api-Key": "valid-token"},
246241
)
247242
self.assertEqual(r.status_code, 400)
243+
self.assertFalse(mock_update_searchindex_task.delay.called)
248244

249245
# empty files
250246
r = self.client.post(
@@ -263,6 +259,7 @@ def _valid_post_data():
263259
headers={"X-Api-Key": "valid-token"},
264260
)
265261
self.assertEqual(r.status_code, 400)
262+
self.assertFalse(mock_update_searchindex_task.delay.called)
266263

267264
# bad file type
268265
r = self.client.post(
@@ -276,9 +273,10 @@ def _valid_post_data():
276273
headers={"X-Api-Key": "valid-token"},
277274
)
278275
self.assertEqual(r.status_code, 400)
276+
self.assertFalse(mock_update_searchindex_task.delay.called)
279277

280278
# Put a file in the way. Post should fail because replace = False
281-
file_in_the_way = (rfc_path / f"rfc{unused_rfc_number}.txt")
279+
file_in_the_way = (rfc_path / f"{rfc.name}.txt")
282280
file_in_the_way.touch()
283281
r = self.client.post(
284282
url,
@@ -287,11 +285,12 @@ def _valid_post_data():
287285
headers={"X-Api-Key": "valid-token"},
288286
)
289287
self.assertEqual(r.status_code, 409) # conflict
288+
self.assertFalse(mock_update_searchindex_task.delay.called)
290289
file_in_the_way.unlink()
291290

292291
# Put a blob in the way. Post should fail because replace = False
293292
blob_in_the_way = Blob.objects.create(
294-
bucket="rfc", name=f"txt/rfc{unused_rfc_number}.txt", content=b""
293+
bucket="rfc", name=f"txt/{rfc.name}.txt", content=b""
295294
)
296295
r = self.client.post(
297296
url,
@@ -300,6 +299,7 @@ def _valid_post_data():
300299
headers={"X-Api-Key": "valid-token"},
301300
)
302301
self.assertEqual(r.status_code, 409) # conflict
302+
self.assertFalse(mock_update_searchindex_task.delay.called)
303303
blob_in_the_way.delete()
304304

305305
# valid post
@@ -310,8 +310,13 @@ def _valid_post_data():
310310
headers={"X-Api-Key": "valid-token"},
311311
)
312312
self.assertEqual(r.status_code, 200)
313+
self.assertTrue(mock_update_searchindex_task.delay.called)
314+
self.assertEqual(
315+
mock_update_searchindex_task.delay.call_args,
316+
mock.call(rfc.rfc_number),
317+
)
313318
for extension in ["xml", "txt", "html", "pdf", "json"]:
314-
filename = f"rfc{unused_rfc_number}.{extension}"
319+
filename = f"{rfc.name}.{extension}"
315320
self.assertEqual(
316321
(rfc_path / filename)
317322
.read_text(),
@@ -328,7 +333,7 @@ def _valid_post_data():
328333
f"{extension} blob should contain the expected content",
329334
)
330335
# special case for notprepped
331-
notprepped_fn = f"rfc{unused_rfc_number}.notprepped.xml"
336+
notprepped_fn = f"{rfc.name}.notprepped.xml"
332337
self.assertEqual(
333338
(
334339
rfc_path / "prerelease" / notprepped_fn
@@ -347,22 +352,29 @@ def _valid_post_data():
347352
)
348353

349354
# re-post with replace = False should now fail
355+
mock_update_searchindex_task.reset_mock()
350356
r = self.client.post(
351357
url,
352358
_valid_post_data(),
353359
format="multipart",
354360
headers={"X-Api-Key": "valid-token"},
355361
)
356362
self.assertEqual(r.status_code, 409) # conflict
357-
363+
self.assertFalse(mock_update_searchindex_task.delay.called)
364+
358365
# re-post with replace = True should succeed
359366
r = self.client.post(
360367
url,
361368
_valid_post_data() | {"replace": True},
362369
format="multipart",
363370
headers={"X-Api-Key": "valid-token"},
364371
)
365-
self.assertEqual(r.status_code, 200) # conflict
372+
self.assertEqual(r.status_code, 200)
373+
self.assertTrue(mock_update_searchindex_task.delay.called)
374+
self.assertEqual(
375+
mock_update_searchindex_task.delay.call_args,
376+
mock.call(rfc.rfc_number),
377+
)
366378

367379
@override_settings(APP_API_TOKENS={"ietf.api.views_rpc": ["valid-token"]})
368380
@mock.patch("ietf.api.views_rpc.create_rfc_index_task")

ietf/api/views_rpc.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
from ietf.doc.models import Document, DocHistory, RfcAuthor, DocEvent
3939
from ietf.doc.serializers import RfcAuthorSerializer
4040
from ietf.doc.storage_utils import remove_from_storage, store_file, exists_in_storage
41-
from ietf.doc.tasks import signal_update_rfc_metadata_task
41+
from ietf.doc.tasks import signal_update_rfc_metadata_task, update_rfc_searchindex_task
4242
from ietf.person.models import Email, Person
4343
from ietf.sync.tasks import create_rfc_index_task
4444

@@ -516,6 +516,7 @@ def post(self, request):
516516
destination.parent.mkdir()
517517
shutil.move(ftm, destination)
518518

519+
update_rfc_searchindex_task.delay(rfc.rfc_number)
519520
return Response(NotificationAckSerializer().data)
520521

521522

ietf/doc/models.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1285,19 +1285,24 @@ def submission(self):
12851285
s = s.first()
12861286
return s
12871287

1288-
def pub_date(self):
1289-
"""Get the publication date for this document
1290-
1291-
This is the rfc publication date for RFCs, and the new-revision date for other documents.
1292-
"""
1288+
def pub_datetime(self):
1289+
"""Get the publication datetime of this document"""
12931290
if self.type_id == "rfc":
12941291
# As of Sept 2022, in ietf.sync.rfceditor.update_docs_from_rfc_index() `published_rfc` events are
12951292
# created with a timestamp whose date *in the PST8PDT timezone* is the official publication date
12961293
# assigned by the RFC editor.
12971294
event = self.latest_event(type='published_rfc')
12981295
else:
12991296
event = self.latest_event(type='new_revision')
1300-
return event.time.astimezone(RPC_TZINFO).date() if event else None
1297+
return event.time.astimezone(RPC_TZINFO) if event else None
1298+
1299+
def pub_date(self):
1300+
"""Get the publication date for this document
1301+
1302+
This is the rfc publication date for RFCs, and the new-revision date for other documents.
1303+
"""
1304+
pub_datetime = self.pub_datetime()
1305+
return None if pub_datetime is None else pub_datetime.date()
13011306

13021307
def is_dochistory(self):
13031308
return False

ietf/doc/tasks.py

Lines changed: 39 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# Celery task definitions
44
#
55
import datetime
6+
67
import debug # pyflakes:ignore
78

89
from celery import shared_task
@@ -11,7 +12,7 @@
1112
from django.conf import settings
1213
from django.utils import timezone
1314

14-
from ietf.utils import log
15+
from ietf.utils import log, searchindex
1516
from ietf.utils.timezone import datetime_today
1617

1718
from .expire import (
@@ -77,17 +78,19 @@ def expire_last_calls_task():
7778
try:
7879
expire_last_call(doc)
7980
except Exception:
80-
log.log(f"ERROR: Failed to expire last call for {doc.file_tag()} (id={doc.pk})")
81+
log.log(
82+
f"ERROR: Failed to expire last call for {doc.file_tag()} (id={doc.pk})"
83+
)
8184
else:
8285
log.log(f"Expired last call for {doc.file_tag()} (id={doc.pk})")
8386

8487

85-
@shared_task
88+
@shared_task
8689
def generate_idnits2_rfc_status_task():
8790
outpath = Path(settings.DERIVED_DIR) / "idnits2-rfc-status"
8891
blob = generate_idnits2_rfc_status()
8992
try:
90-
outpath.write_text(blob, encoding="utf8") # TODO-BLOBSTORE
93+
outpath.write_text(blob, encoding="utf8") # TODO-BLOBSTORE
9194
except Exception as e:
9295
log.log(f"failed to write idnits2-rfc-status: {e}")
9396

@@ -97,15 +100,15 @@ def generate_idnits2_rfcs_obsoleted_task():
97100
outpath = Path(settings.DERIVED_DIR) / "idnits2-rfcs-obsoleted"
98101
blob = generate_idnits2_rfcs_obsoleted()
99102
try:
100-
outpath.write_text(blob, encoding="utf8") # TODO-BLOBSTORE
103+
outpath.write_text(blob, encoding="utf8") # TODO-BLOBSTORE
101104
except Exception as e:
102105
log.log(f"failed to write idnits2-rfcs-obsoleted: {e}")
103106

104107

105108
@shared_task
106109
def generate_draft_bibxml_files_task(days=7, process_all=False):
107110
"""Generate bibxml files for recently updated docs
108-
111+
109112
If process_all is False (the default), processes only docs with new revisions
110113
in the last specified number of days.
111114
"""
@@ -117,7 +120,9 @@ def generate_draft_bibxml_files_task(days=7, process_all=False):
117120
doc__type_id="draft",
118121
).order_by("time")
119122
if not process_all:
120-
doc_events = doc_events.filter(time__gte=timezone.now() - datetime.timedelta(days=days))
123+
doc_events = doc_events.filter(
124+
time__gte=timezone.now() - datetime.timedelta(days=days)
125+
)
121126
for event in doc_events:
122127
try:
123128
update_or_create_draft_bibxml_file(event.doc, event.rev)
@@ -132,6 +137,7 @@ def investigate_fragment_task(name_fragment: str):
132137
"results": investigate_fragment(name_fragment),
133138
}
134139

140+
135141
@shared_task
136142
def rebuild_reference_relations_task(doc_names: list[str]):
137143
log.log(f"Task: Rebuilding reference relations for {doc_names}")
@@ -157,6 +163,32 @@ def rebuild_reference_relations_task(doc_names: list[str]):
157163
def fixup_bofreq_timestamps_task(): # pragma: nocover
158164
fixup_bofreq_timestamps()
159165

166+
160167
@shared_task
161168
def signal_update_rfc_metadata_task(rfc_number_list=()):
162169
signal_update_rfc_metadata(rfc_number_list)
170+
171+
172+
@shared_task(bind=True)
173+
def update_rfc_searchindex_task(self, rfc_number: int):
174+
"""Update the search index for one RFC"""
175+
if not searchindex.enabled():
176+
log.log("Search indexing is not enabled, skipping")
177+
return
178+
179+
rfc = Document.objects.filter(type_id="rfc", rfc_number=rfc_number).first()
180+
if rfc is None:
181+
log.log(
182+
f"ERROR: Document for rfc{rfc_number} not found, not updating search index"
183+
)
184+
return
185+
try:
186+
searchindex.update_or_create_rfc_entry(rfc)
187+
except Exception as err:
188+
log.log(f"Search index update for {rfc.name} failed ({err})")
189+
if isinstance(err, searchindex.RETRYABLE_ERROR_CLASSES):
190+
searchindex_settings = searchindex.get_settings()
191+
self.retry(
192+
countdown=searchindex_settings["TASK_RETRY_DELAY"],
193+
max_retries=searchindex_settings["TASK_MAX_RETRIES"],
194+
)

0 commit comments

Comments
 (0)