From abf00e0d2f572a6f3ee753cf92c6fb7b0db912e0 Mon Sep 17 00:00:00 2001 From: Julien Sicot Date: Thu, 17 Oct 2024 17:18:05 +0200 Subject: [PATCH 01/12] add upload_file_to_workspace method --- dspace_rest_client/client.py | 57 +++++++++++++++++++++++++++++------- 1 file changed, 46 insertions(+), 11 deletions(-) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index 2d924ef..d2743e8 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -14,15 +14,12 @@ @author Kim Shepherd """ -import code -import configparser import json import logging import os import requests from requests import Request -import urllib.parse from dotenv import load_dotenv from uuid import UUID @@ -1142,23 +1139,25 @@ def create_workflowitem(self, workspace_id): Create workflow item from workspace item ID. @param workspace_id: ID of the workspace item to create workflow item from - @return: Response from API + @return: Response from API or False in case of failure """ - params = { - "projection": "full" - } - url = f"{self.API_ENDPOINT}/workflow/workflowitems" params = None # No additional parameters for this request uri_list = f"{self.API_ENDPOINT}/submission/workspaceitems/{workspace_id}" + try: r = self.api_post_uri(url, params=params, uri_list=uri_list) r.raise_for_status() - logging.info(f"WorkflowItem created successfully from WorkspaceItem #{workspace_id}") + logging.info( + f"WorkflowItem created successfully from WorkspaceItem #{workspace_id}" + ) return r.json() except requests.RequestException as e: - logging.error(f"Failed to create WorkflowItem: {r.status_code}, {r.text}") - return False + # Log the error without raising an exception that would block execution + logging.error( + f"Failed to create WorkflowItem: {getattr(r, 'status_code', 'N/A')}, {getattr(r, 'text', 'No response text')}" + ) + return {"success": False, "error": str(e), "workspace_id": workspace_id} def import_unpaywall_fulltext(self, workspace_item_id): try: @@ -1215,3 +1214,39 @@ def import_unpaywall_fulltext(self, workspace_item_id): except requests.exceptions.RequestException as e: logging.error(f"Request failed: {e}") return False + + def upload_file_to_workspace(self, workspace_id, file_path): + """ + Upload a file to a workspace item in DSpace. + + :param workspace_id: ID of the workspace item (e.g., '241262') + :param file_path: Path to the file you want to upload + :return: The response from the API + """ + url = f"{self.API_ENDPOINT}/submission/workspaceitems/{workspace_id}" + + # Open the file in binary mode + with open(file_path, "rb") as file: + files = { + "file": (file_path, file), + } + headers = { + "accept": "*/*", + "access": self.ACCESS_TOKEN, + "Authorization": f"Bearer {self.API_TOKEN}", + } + + # Perform the request to upload the file + response = self.session.post(url, headers=headers, files=files) + + # Log the status and return the response + if response.status_code == 200 or response.status_code == 201: + logging.info( + f"File uploaded successfully to workspace item {workspace_id}" + ) + else: + logging.error( + f"Failed to upload file to workspace item {workspace_id}. Status: {response.status_code}. Response: {response.text}" + ) + + return response From 715e17fa2bee8253ca34de99ef08735d97e2382b Mon Sep 17 00:00:00 2001 From: Julien Sicot Date: Tue, 5 Nov 2024 14:08:08 +0100 Subject: [PATCH 02/12] add method to delete a workspace item --- dspace_rest_client/client.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index d2743e8..06f8cb6 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -245,7 +245,7 @@ def api_put(self, url, params, json, retry=False): return r - def api_delete(self, url, params, retry=False): + def api_delete(self, url, params=None, retry=False): """ Perform a DELETE request. Refresh XSRF token if necessary. DELETES are typically used to update objects. @@ -1250,3 +1250,29 @@ def upload_file_to_workspace(self, workspace_id, file_path): ) return response + + def delete_workspace_item(self, workspace_item_id): + """ + Deletes a workspace item in DSpace by its ID. + :param workspace_item_id: ID of the workspace item to delete. + :return: Response from the API. + """ + if not workspace_item_id: + logging.error("Workspace item ID must be provided.") + return None + + # Construct the URL for the DELETE request + url = f"{self.API_ENDPOINT}/submission/workspaceitems/{workspace_item_id}" + + # Perform the DELETE request + response = self.api_delete(url) + + # Check for successful deletion + if response.status_code == 204: + logging.info(f"Workspace item {workspace_item_id} deleted successfully.") + elif response.status_code == 404: + logging.warning(f"Workspace item {workspace_item_id} not found.") + else: + logging.error(f"Failed to delete workspace item {workspace_item_id}: {response.status_code} - {response.text}") + + return response From d1c10cbca33f153d915a9bc43cc2fb624828e8a9 Mon Sep 17 00:00:00 2001 From: Julien Sicot Date: Fri, 8 Nov 2024 08:48:50 +0100 Subject: [PATCH 03/12] add method to search autorities --- dspace_rest_client/client.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index 06f8cb6..9e59012 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -1250,7 +1250,7 @@ def upload_file_to_workspace(self, workspace_id, file_path): ) return response - + def delete_workspace_item(self, workspace_item_id): """ Deletes a workspace item in DSpace by its ID. @@ -1276,3 +1276,31 @@ def delete_workspace_item(self, workspace_item_id): logging.error(f"Failed to delete workspace item {workspace_item_id}: {response.status_code} - {response.text}") return response + + def get_authority(self, authority_type="AuthorAuthority", metadata="dc.contributor.author", filter_text="", exact=False): + """ + Queries the DSpace API to retrieve author authorities based on the provided criteria. + + @param metadata: Metadata field used for the search, default is "dc.contributor.author". + @param authority_type: Filter by authority type. + @param filter_text: Filter text for author search. + @param exact: Boolean indicating whether the match should be exact. + @return: API response containing author authorities matching the query. + """ + url = f"{self.API_ENDPOINT}/submission/vocabularies/{authority_type}/entries" + + params = { + "metadata": metadata, + "filter": filter_text, + "exact": str(exact), + } + + response = self.api_get(url, params=params) + + if response.status_code == 200: + return parse_json(response) # Retourner le JSON parsé + else: + logging.error( + f"Error when retrieving author autorithy: {response.status_code}" + ) + return None From d0f20b64a3ffe0536feadecbe1b10ce3d3901a67 Mon Sep 17 00:00:00 2001 From: Julien Sicot Date: Tue, 7 Jan 2025 07:33:03 +0100 Subject: [PATCH 04/12] handle remove operation when patching --- dspace_rest_client/client.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index 9e59012..0a21bdf 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -1118,11 +1118,24 @@ def update_workspaceitem(self, workspace_item_id, patch_operations): path = operation.get("path") value = operation.get("value") - if not op_type or not path or value is None: - logging.error(f"Invalid operation: {operation}") + if not op_type or not path: + logging.error( + f"Invalid operation: {operation} - Missing 'op' or 'path'" + ) continue - r = self.api_patch(url=url, operation=op_type, path=path, value=value) + # handling 'remove' operation + if op_type == "remove": + r = self.api_patch(url=url, operation=op_type, path=path, value=None) + else: + if value is None: + logging.error( + f"Invalid operation: {operation} - 'value' is required for operation '{op_type}'" + ) + continue + + r = self.api_patch(url=url, operation=op_type, path=path, value=value) + r.raise_for_status() logging.info("WorkspaceItem updated successfully") From 5eac58b20f04893555bbb97a33aa8edffd2b3d37 Mon Sep 17 00:00:00 2001 From: Julien Sicot Date: Thu, 16 Jan 2025 17:50:09 +0100 Subject: [PATCH 05/12] remove useless logging info --- dspace_rest_client/client.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index 0a21bdf..a0cf38a 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -17,14 +17,14 @@ import json import logging import os - +from uuid import UUID import requests from requests import Request from dotenv import load_dotenv -from uuid import UUID from .models import * + __all__ = ['DSpaceClient'] logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO) @@ -124,7 +124,6 @@ def authenticate(self, retry=False): if r.status_code == 200: r_json = parse_json(r) if 'authenticated' in r_json and r_json['authenticated'] is True: - logging.info(f'Authenticated successfully') return r_json['authenticated'] # Default, return false return False From fd4da397b0c82682146479e7271f5132d8859b10 Mon Sep 17 00:00:00 2001 From: Julien Sicot Date: Tue, 11 Feb 2025 22:30:11 +0100 Subject: [PATCH 06/12] update upload_file_to_workspace --- dspace_rest_client/client.py | 63 ++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 24 deletions(-) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index a0cf38a..f910654 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -17,6 +17,7 @@ import json import logging import os +from pathlib import Path from uuid import UUID import requests from requests import Request @@ -1227,6 +1228,7 @@ def import_unpaywall_fulltext(self, workspace_item_id): logging.error(f"Request failed: {e}") return False + def upload_file_to_workspace(self, workspace_id, file_path): """ Upload a file to a workspace item in DSpace. @@ -1237,31 +1239,44 @@ def upload_file_to_workspace(self, workspace_id, file_path): """ url = f"{self.API_ENDPOINT}/submission/workspaceitems/{workspace_id}" - # Open the file in binary mode - with open(file_path, "rb") as file: - files = { - "file": (file_path, file), - } - headers = { - "accept": "*/*", - "access": self.ACCESS_TOKEN, - "Authorization": f"Bearer {self.API_TOKEN}", - } - - # Perform the request to upload the file - response = self.session.post(url, headers=headers, files=files) - - # Log the status and return the response - if response.status_code == 200 or response.status_code == 201: - logging.info( - f"File uploaded successfully to workspace item {workspace_id}" - ) - else: - logging.error( - f"Failed to upload file to workspace item {workspace_id}. Status: {response.status_code}. Response: {response.text}" - ) + # Vérifier que file_path est bien une instance de Path et convertir en string + if isinstance(file_path, Path): + file_path = file_path.resolve() # S'assurer qu'il est absolu + + if not file_path or not os.path.exists(file_path): + logging.error(f"PDF file {file_path} not found.") + return None - return response + try: + # Ouvrir le fichier en mode binaire + with open(file_path, "rb") as file: + files = { + "file": (str(file_path.name), file), # Convertir file_path en string + } + headers = { + "accept": "*/*", + "access": self.ACCESS_TOKEN, + "Authorization": f"Bearer {self.API_TOKEN}", + } + + # Effectuer la requête POST pour uploader le fichier + response = self.session.post(url, headers=headers, files=files) + + # Vérifier la réponse + if response.status_code in [200, 201]: + logging.info( + f"File uploaded successfully to workspace item {workspace_id}" + ) + else: + logging.error( + f"Failed to upload file to workspace item {workspace_id}. Status: {response.status_code}. Response: {response.text}" + ) + + return response + + except Exception as e: + logging.error(f"Erreur lors de l'upload du fichier {file_path} : {e}") + return None def delete_workspace_item(self, workspace_item_id): """ From a407882070abb7335af6d9f7ccf7e0ab4912f4d3 Mon Sep 17 00:00:00 2001 From: Julien Sicot Date: Fri, 4 Apr 2025 07:50:28 +0200 Subject: [PATCH 07/12] added two functions for retrieving suggestion (publication claim) from dspace-cris --- dspace_rest_client/client.py | 171 ++++++++++++++++++++++++++++++++++- 1 file changed, 170 insertions(+), 1 deletion(-) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index f910654..2a394af 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -1228,7 +1228,6 @@ def import_unpaywall_fulltext(self, workspace_item_id): logging.error(f"Request failed: {e}") return False - def upload_file_to_workspace(self, workspace_id, file_path): """ Upload a file to a workspace item in DSpace. @@ -1331,3 +1330,173 @@ def get_authority(self, authority_type="AuthorAuthority", metadata="dc.contribut f"Error when retrieving author autorithy: {response.status_code}" ) return None + + def get_external_suggestions( + self, page=0, size=50, sort="display,ASC", source="orcidWorks" + ): + """ + Retrieve all ORCID suggestions from the DSpace REST API. + + This method queries the API to fetch all suggestion targets based on the specified source (e.g., ORCID works), + handling pagination automatically to retrieve all results. + + :param page: Starting page number for pagination (default: 0). + :param size: Number of results per page (default: 100). + :param sort: Sorting criteria (default: "display,ASC"). + :param source: Source of suggestions (default: "orcidWorks"). + :return: A list of all suggestion targets or None if an error occurs. + """ + # Construct the API endpoint URL + url = f"{self.API_ENDPOINT}/integration/suggestiontargets/search/findBySource" + + # Define query parameters + params = {"page": page, "size": size, "sort": sort, "source": source} + + all_suggestions = [] # To store all results + + try: + while True: + # Perform the GET request + response = self.api_get(url, params=params) + + # Check if the response is successful + if response.status_code == 200: + data = parse_json(response) + + if not data: + logging.error("Empty or invalid JSON response.") + return None + + # Extract suggestions from the current page + suggestions = data.get("_embedded", {}).get("suggestiontargets", []) + + if not suggestions: + logging.info("No ORCID suggestions found in the current page.") + else: + all_suggestions.extend(suggestions) + + # Check if there is a next page + links = data.get("_links", {}) + if "next" in links: + # Update URL and params for the next page + next_url = links["next"]["href"] + url = next_url # Use the next URL directly + params = None # Clear params since next_url includes them + else: + break # No more pages to fetch + + else: + logging.error( + f"Failed to retrieve ORCID suggestions: {response.status_code} - {response.text}" + ) + return None + + return all_suggestions + + except requests.exceptions.RequestException as e: + logging.error( + f"Request exception occurred while retrieving ORCID suggestions: {e}" + ) + return None + + + def get_suggestions_by_target( + self, target, page=0, size=50, sort="trust,DESC", source="orcidWorks" + ): + """ + Retrieve all suggestions associated with a specific target (profile) from the DSpace REST API. + + This method queries the API to fetch all suggestion targets for a given profile, + handling pagination automatically to retrieve all results. + + Filters the metadata to include only 'id', 'display', 'metadata.dc.date.issued', 'metadata.dc.title', + and adds a new column 'pubyear' based on the year extracted from 'dc.date.issued'. + + :param target: UUID of the target profile. + :param page: Starting page number for pagination (default: 0). + :param size: Number of results per page (default: 50). + :param sort: Sorting criteria (default: "trust,DESC"). + :param source: Source of the suggestions (default: "orcidWorks"). + :return: A list of all filtered suggestions or None if an error occurs. + """ + # Construct the API endpoint URL + url = f"{self.API_ENDPOINT}/integration/suggestions/search/findByTargetAndSource" + + # Define query parameters + params = { + "page": page, + "size": size, + "sort": sort, + "target": target, + "source": source, + } + + all_suggestions = [] # To store all results + + try: + while True: + # Perform the GET request + response = self.api_get(url, params=params) + + # Check if the response is successful + if response.status_code == 200: + data = parse_json(response) + if not data: + logging.error("Empty or invalid JSON response.") + return None + + # Extract suggestions from the current page + suggestions = data.get("_embedded", {}).get("suggestions", []) + for suggestion in suggestions: + # Extract and filter metadata + date_issued = [ + entry.get("value") + for entry in suggestion.get("metadata", {}).get( + "dc.date.issued", [] + ) + ] + + # Extract publication year (first 4 digits of the first date issued) + pubyear = None + if date_issued: + try: + pubyear = date_issued[0][:4] # Extract the year part + except IndexError: + pubyear = None + + filtered_suggestion = { + "id": suggestion.get("id"), + "display": suggestion.get("display"), + "dc.date.issued": date_issued, + "dc.title": [ + entry.get("value") + for entry in suggestion.get("metadata", {}).get( + "dc.title", [] + ) + ], + "pubyear": pubyear, # Add publication year column + } + all_suggestions.append(filtered_suggestion) + + # Check if there is a next page + links = data.get("_links", {}) + if "next" in links: + next_url = links["next"]["href"] + url = next_url # Use the next URL directly + params = None # Clear params since next_url includes them + else: + break # No more pages to fetch + + else: + logging.error( + f"Failed to retrieve suggestions for target {target}: {response.status_code} - {response.text}" + ) + return None + + return {"suggestions": all_suggestions} + + except requests.exceptions.RequestException as e: + logging.error( + f"Request exception occurred while retrieving suggestions for target {target}: {e}" + ) + return None From e60fc0323548e9b3c54b08135133024daad741a6 Mon Sep 17 00:00:00 2001 From: Julien Sicot Date: Tue, 6 May 2025 17:57:30 +0200 Subject: [PATCH 08/12] add method to remove workflow item --- dspace_rest_client/client.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index 2a394af..e4791cd 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -1302,6 +1302,32 @@ def delete_workspace_item(self, workspace_item_id): logging.error(f"Failed to delete workspace item {workspace_item_id}: {response.status_code} - {response.text}") return response + + def delete_workflow_item(self, workflow_item_id): + """ + Deletes a workspace item in DSpace by its ID. + :param workspace_item_id: ID of the workspace item to delete. + :return: Response from the API. + """ + if not workflow_item_id: + logging.error("Workflow item ID must be provided.") + return None + + # Construct the URL for the DELETE request + url = f"{self.API_ENDPOINT}/workflow/workflowitems/{workflow_item_id}?expunge=true" + + # Perform the DELETE request + response = self.api_delete(url) + + # Check for successful deletion + if response.status_code == 204: + logging.info(f"Workflow item {workflow_item_id} deleted successfully.") + elif response.status_code == 404: + logging.warning(f"Workflow item {workflow_item_id} not found.") + else: + logging.error(f"Failed to delete Workflow item {workflow_item_id}: {response.status_code} - {response.text}") + + return response def get_authority(self, authority_type="AuthorAuthority", metadata="dc.contributor.author", filter_text="", exact=False): """ From e108b9afa56317ec2e91eecd5c37fcfee2260dbc Mon Sep 17 00:00:00 2001 From: Julien Sicot Date: Tue, 8 Jul 2025 16:50:04 +0200 Subject: [PATCH 09/12] added function for adding file and updating item in fulladmin (cherry picked from commit 7f946e240dec0993a9f6661184ab0dfa5a4b003c) --- dspace_rest_client/client.py | 120 ++++++++++++++++++++++++++++++++++- 1 file changed, 117 insertions(+), 3 deletions(-) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index e4791cd..fc47c69 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -1147,6 +1147,63 @@ def update_workspaceitem(self, workspace_item_id, patch_operations): logging.error("Response content: %s", r.content) return False + def update_adminitem(self, uuid, patch_operations, embed="item"): + """ + Apply one or more JSON Patch operations to an admin-editable item via /core/edititems/{uuid}:FULLADMIN. + + :param uuid: UUID of the item to update. + :param patch_operations: List of patch operations (dicts with 'op', 'path', and optionally 'value'). + :param embed: Optional embed parameter in the URL (default: 'item'). + :return: Parsed JSON response if successful, else False. + """ + if not uuid: + logging.error("UUID of admin item is required.") + return False + + if not isinstance(patch_operations, list) or not patch_operations: + logging.error("patch_operations must be a non-empty list.") + return False + + url = f"{self.API_ENDPOINT}/core/edititems/{uuid}:FULLADMIN" + if embed: + url += f"?embed={embed}" + + r = None + + try: + for operation in patch_operations: + op_type = operation.get("op") + path = operation.get("path") + value = operation.get("value") + + if not op_type or not path: + logging.error( + f"Invalid operation: {operation} - Missing 'op' or 'path'" + ) + continue + + if op_type == "remove": + r = self.api_patch(url=url, operation=op_type, path=path, value=None) + else: + if value is None: + logging.error( + f"Invalid operation: {operation} - 'value' is required for operation '{op_type}'" + ) + continue + + r = self.api_patch(url=url, operation=op_type, path=path, value=value) + + r.raise_for_status() + + logging.info(f"Admin item {uuid} updated successfully.") + return parse_json(r) + + except requests.exceptions.RequestException as e: + logging.error(f"Request failed during admin item update: {e}") + if r is not None: + logging.error("Response content: %s", r.content) + return False + def create_workflowitem(self, workspace_id): """ Create workflow item from workspace item ID. @@ -1238,7 +1295,6 @@ def upload_file_to_workspace(self, workspace_id, file_path): """ url = f"{self.API_ENDPOINT}/submission/workspaceitems/{workspace_id}" - # Vérifier que file_path est bien une instance de Path et convertir en string if isinstance(file_path, Path): file_path = file_path.resolve() # S'assurer qu'il est absolu @@ -1277,6 +1333,65 @@ def upload_file_to_workspace(self, workspace_id, file_path): logging.error(f"Erreur lors de l'upload du fichier {file_path} : {e}") return None + + def add_file_adminitem(self, uuid, file_path): + """ + Upload a file to an admin-editable item via /core/edititems/{uuid}:FULLADMIN. + Sends the CSRF token extracted from the cookie as a header. + """ + if not uuid: + logging.error("UUID of the edit item must be provided.") + return None + + if isinstance(file_path, Path): + file_path = file_path.resolve() + + if not os.path.exists(file_path): + logging.error(f"File not found at path: {file_path}") + return None + + # URL with :FULLADMIN + edit_token = f"{uuid}:FULLADMIN" + url = f"{self.API_ENDPOINT}/core/edititems/{edit_token}" + + try: + with open(file_path, "rb") as file: + files = { + "file": (os.path.basename(file_path), file), + } + + # 🔐 Extract CSRF token from cookies + csrf_cookie = self.session.cookies.get("DSPACE-XSRF-COOKIE") + + if not csrf_cookie: + logging.error("CSRF cookie not found. Have you authenticated?") + return None + + headers = { + "accept": "*/*", + "Authorization": f"Bearer {self.API_TOKEN}", + "X-XSRF-TOKEN": csrf_cookie, # ✅ Required CSRF header + } + + response = self.session.post(url, headers=headers, files=files) + + # Update CSRF token if a new one is sent + self.update_token(response) + + if response.status_code in [200, 201]: + logging.info(f"File successfully uploaded to admin item {uuid}") + else: + logging.error( + f"Failed to upload file to admin item {uuid}. " + f"Status: {response.status_code}. Response: {response.text}" + ) + + return response + + except Exception as e: + logging.error(f"Error uploading file to admin item {uuid}: {e}") + return None + def delete_workspace_item(self, workspace_item_id): """ Deletes a workspace item in DSpace by its ID. @@ -1302,7 +1417,7 @@ def delete_workspace_item(self, workspace_item_id): logging.error(f"Failed to delete workspace item {workspace_item_id}: {response.status_code} - {response.text}") return response - + def delete_workflow_item(self, workflow_item_id): """ Deletes a workspace item in DSpace by its ID. @@ -1425,7 +1540,6 @@ def get_external_suggestions( ) return None - def get_suggestions_by_target( self, target, page=0, size=50, sort="trust,DESC", source="orcidWorks" ): From 82a0d71a8dc11644aa5396625cc1809d9b4d8e25 Mon Sep 17 00:00:00 2001 From: Julien Sicot Date: Fri, 11 Jul 2025 20:29:00 +0200 Subject: [PATCH 10/12] add get_facet_values method --- dspace_rest_client/client.py | 50 +++++++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index fc47c69..0976f03 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -1333,7 +1333,6 @@ def upload_file_to_workspace(self, workspace_id, file_path): logging.error(f"Erreur lors de l'upload du fichier {file_path} : {e}") return None - def add_file_adminitem(self, uuid, file_path): """ Upload a file to an admin-editable item via /core/edititems/{uuid}:FULLADMIN. @@ -1640,3 +1639,52 @@ def get_suggestions_by_target( f"Request exception occurred while retrieving suggestions for target {target}: {e}" ) return None + + def get_facet_values( + self, facet_name, query=None, configuration=None, size=10, page=0, sort=None + ): + """ + Retrieve the values for a specific discovery facet from the DSpace REST API. + + This function is useful for inspecting facet counts (e.g., departments, years, labs) + in the context of a search query. + + :param facet_name: Name of the facet (e.g. 'unitOrLab') + :param query: Optional Lucene-style search query (e.g. 'entityType:(Publication) and dateIssued.year:(2023)') + :param configuration: Optional discovery configuration (e.g. 'researchoutputs') + :param size: Number of facet values to return (default: 10) + :param page: Result page to retrieve (default: 0) + :param sort: Optional sort order (not always supported) + :return: A list of dicts with 'label' and 'count' per facet value, or None if request fails + """ + url = f"{self.API_ENDPOINT}/discover/facets/{facet_name}" + params = { + "page": page, + "size": size, + } + + # Add optional parameters + if query: + params["query"] = query + if configuration: + params["configuration"] = configuration + if sort: + params["sort"] = sort + + # Make the API request + response = self.api_get(url, params=params) + + if response.status_code != 200: + logging.error( + f"Error retrieving facet '{facet_name}': {response.status_code} - {response.text}" + ) + return None + + # Parse the JSON response + data = parse_json(response) + if not data or "_embedded" not in data or "values" not in data["_embedded"]: + logging.warning(f"No facet values found for '{facet_name}'") + return [] + + # Return the facet values + return data["_embedded"]["values"] From 28b150dbd8d5efa818f9896efea967f334a14be5 Mon Sep 17 00:00:00 2001 From: Julien Sicot Date: Thu, 31 Jul 2025 09:26:09 +0200 Subject: [PATCH 11/12] added submitter infos to data model --- dspace_rest_client/models.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/dspace_rest_client/models.py b/dspace_rest_client/models.py index de463ac..3cb8bfd 100644 --- a/dspace_rest_client/models.py +++ b/dspace_rest_client/models.py @@ -106,6 +106,8 @@ class DSpaceObject(HALResource): uuid = None name = None handle = None + submitterName = None + submitterEmail = None metadata = {} lastModified = None type = None @@ -134,6 +136,10 @@ def __init__(self, api_resource=None, dso=None): self.name = api_resource['name'] if 'handle' in api_resource: self.handle = api_resource['handle'] + if "submitterName" in api_resource: + self.submitterName = api_resource["submitterName"] + if "submitterEmail" in api_resource: + self.submitterEmail = api_resource["submitterEmail"] if 'metadata' in api_resource: self.metadata = api_resource['metadata'].copy() # Python interprets _ prefix as private so for now, renaming this and handling it separately @@ -197,6 +203,8 @@ def as_dict(self): 'uuid': self.uuid, 'name': self.name, 'handle': self.handle, + 'submitterName': self.submitterName, + 'submitterEmail': self.submitterEmail, 'metadata': self.metadata, 'lastModified': self.lastModified, 'type': self.type, @@ -516,5 +524,3 @@ class RelationshipType(AddressableHALResource): """ def __init__(self, api_resource): super(RelationshipType, self).__init__(api_resource) - - From 5475e73f5f4fdc2a840b438e7cb62a450f2fd64a Mon Sep 17 00:00:00 2001 From: Julien Sicot Date: Wed, 8 Oct 2025 14:29:21 +0200 Subject: [PATCH 12/12] add count_results function --- dspace_rest_client/client.py | 41 ++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index 0976f03..97780e1 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -408,6 +408,47 @@ def search_objects(self, query=None, filters=None, page=0, size=20, sort=None, d return dsos + def count_results( + self, query=None, filters=None, dso_type=None, configuration=None, scope=None + ): + """ + Compte le nombre total d'objets trouvés par une recherche dans DSpace. + + @param query: chaîne de recherche + @param filters: filtres discovery sous forme de dict ex: {'f.entityType': 'Publication,equals', ... } + @param dso_type: type de DSO pour restreindre les résultats + @param configuration: configuration de recherche (ex: 'researchoutputs', 'person', etc.) + @param scope: uuid pour restreindre le périmètre (collection, communauté, etc.) + @return: entier représentant le nombre total d’items + """ + if filters is None: + filters = {} + + url = f"{self.API_ENDPOINT}/discover/search/objects" + params = {} + + if query is not None: + params["query"] = query + if configuration is not None: + params["configuration"] = configuration + if scope is not None: + params["scope"] = scope + if dso_type is not None: + params["dsoType"] = dso_type + + # inutile de demander beaucoup de résultats, 1 suffit pour récupérer la pagination + params["size"] = 1 + params["page"] = 0 + + try: + r_json = self.fetch_resource(url=url, params={**params, **filters}) + total_elements = r_json["_embedded"]["searchResult"]["page"][ + "totalElements" + ] + return total_elements + except (KeyError, TypeError, ValueError) as err: + logging.error(f"Erreur en récupérant le nombre total d'objets : {err}") + def fetch_resource(self, url, params=None): """ Simple function for higher-level 'get' functions to use whenever they want