@@ -2054,7 +2054,7 @@ def remove_objects_from_page(
20542054 text_filters: Properties of text to be deleted, if applicable. Optional.
20552055 This is a Python dictionary with the following properties:
20562056
2057- * font_ids: List of font IDs (such as /F1 or /T1_0) to be deleted.
2057+ * font_ids: List of font resource IDs (such as /F1 or /T1_0) to be deleted.
20582058
20592059 """
20602060 if isinstance (to_delete , (list , tuple )):
@@ -2119,8 +2119,9 @@ def clean(
21192119 )
21202120 ):
21212121 if (
2122- not to_delete & ObjectDeletionFlag .TEXT
2123- or (not font_ids_to_delete or font_id in font_ids_to_delete )
2122+ not to_delete & ObjectDeletionFlag .TEXT
2123+ or (to_delete & ObjectDeletionFlag .TEXT and not text_filters )
2124+ or (to_delete & ObjectDeletionFlag .TEXT and font_id in font_ids_to_delete )
21242125 ):
21252126 del content .operations [i ]
21262127 else :
@@ -2246,16 +2247,49 @@ def remove_text(self, font_names: Optional[List[str]] = None) -> None:
22462247 font_names = []
22472248
22482249 for page in self .pages :
2249- font_ids = []
2250- fonts = page .get ("/Resources" , {}).get ("/Font" , {})
2251- for font_id , font_info in fonts .items ():
2252- font_name = font_info .get ("/BaseFont" , "" ).split ("+" )[- 1 ]
2253- if font_name in font_names :
2254- font_ids .append (font_id )
2255-
2256- text_filters = {
2257- "font_ids" : font_ids ,
2258- }
2250+ resource_ids_to_remove = []
2251+
2252+ # Content streams reference fonts and other resources with names like "/F1" or "/T1_0"
2253+ # Font names need to be converted to resource names/IDs for easier removal
2254+ if font_names :
2255+ # Recursively loop through page objects to gather font info
2256+ def get_font_info (
2257+ obj : Any ,
2258+ font_info : Optional [Dict [str , Any ]] = None ,
2259+ key : Optional [str ] = None
2260+ ) -> Dict [str , Any ]:
2261+ if font_info is None :
2262+ font_info = {}
2263+ if isinstance (obj , IndirectObject ):
2264+ obj = obj .get_object ()
2265+ if isinstance (obj , dict ):
2266+ if obj .get ("/Type" ) == "/Font" :
2267+ font_name = obj .get ("/BaseFont" , "" )
2268+ # Normalize font names like "/RRXFFV+Palatino-Bold" to "Palatino-Bold"
2269+ normalized_font_name = font_name .lstrip ("/" ).split ("+" )[- 1 ]
2270+ if normalized_font_name not in font_info :
2271+ font_info [normalized_font_name ] = {
2272+ "normalized_font_name" : normalized_font_name ,
2273+ "resource_ids" : [],
2274+ }
2275+ if key not in font_info [normalized_font_name ]["resource_ids" ]:
2276+ font_info [normalized_font_name ]["resource_ids" ].append (key )
2277+ for k in obj :
2278+ font_info = get_font_info (obj [k ], font_info , k )
2279+ elif isinstance (obj , (list , ArrayObject )):
2280+ for child_obj in obj :
2281+ font_info = get_font_info (child_obj , font_info )
2282+ return font_info
2283+
2284+ # Add relevant resource names for removal
2285+ font_info = get_font_info (page .get ("/Resources" ))
2286+ for font_name in font_names :
2287+ if font_name in font_info :
2288+ resource_ids_to_remove .extend (font_info [font_name ]["resource_ids" ])
2289+
2290+ text_filters = {}
2291+ if font_names :
2292+ text_filters ["font_ids" ] = resource_ids_to_remove
22592293 self .remove_objects_from_page (page , ObjectDeletionFlag .TEXT , text_filters = text_filters )
22602294
22612295 def add_uri (
0 commit comments