From 47ae0e2cc20f161e6ac5b5f1a87885cd7b945444 Mon Sep 17 00:00:00 2001
From: Daniel Mietchen <daniel.mietchen@googlemail.com>
Date: Tue, 30 Apr 2024 16:47:57 +0200
Subject: [PATCH 01/13] Typo fix in parameter_selection.rst

---
 docs/parameter_selection.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/parameter_selection.rst b/docs/parameter_selection.rst
index 1952cd7f..0f459df3 100644
--- a/docs/parameter_selection.rst
+++ b/docs/parameter_selection.rst
@@ -128,7 +128,7 @@ Selecting ``min_samples``
 Since we have seen that ``min_samples`` clearly has a dramatic effect on
 clustering, the question becomes: how do we select this parameter? The
 simplest intuition for what ``min_samples`` does is provide a measure of
-how conservative you want you clustering to be. The larger the value of
+how conservative you want your clustering to be. The larger the value of
 ``min_samples`` you provide, the more conservative the clustering --
 more points will be declared as noise, and clusters will be restricted
 to progressively more dense areas. We can see this in practice by

From c9933c8cd8693bbd23618c22eadec03c2bd0d420 Mon Sep 17 00:00:00 2001
From: cearlefraym <97044512+cearlefraym@users.noreply.github.com>
Date: Wed, 22 May 2024 14:31:32 -0400
Subject: [PATCH 02/13] correct typo in variable declaration

---
 docs/how_to_use_epsilon.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/how_to_use_epsilon.rst b/docs/how_to_use_epsilon.rst
index 970b9b97..0f7e2aec 100644
--- a/docs/how_to_use_epsilon.rst
+++ b/docs/how_to_use_epsilon.rst
@@ -43,7 +43,7 @@ In our example, we choose to merge nested clusters below 5 meters (0.005 kilomet
 
 	X = np.radians(coordinates) #convert the list of lat/lon coordinates to radians
 	earth_radius_km = 6371
-	epsilon = 0.005 / earth_radius #calculate 5 meter epsilon threshold
+	epsilon = 0.005 / earth_radius_km #calculate 5 meter epsilon threshold
 	
 	clusterer = hdbscan.HDBSCAN(min_cluster_size=4, metric='haversine', 
 	cluster_selection_epsilon=epsilon, cluster_selection_method = 'eom')

From a6f1c3aa01de54ad156b7ba9787e34b5803f4504 Mon Sep 17 00:00:00 2001
From: Leland McInnes <leland.mcinnes@gmail.com>
Date: Thu, 23 May 2024 19:52:15 -0400
Subject: [PATCH 03/13] Update _hdbscan_tree.pyx for newer numpy

---
 hdbscan/_hdbscan_tree.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hdbscan/_hdbscan_tree.pyx b/hdbscan/_hdbscan_tree.pyx
index 31ae7dfd..842bfa2f 100644
--- a/hdbscan/_hdbscan_tree.pyx
+++ b/hdbscan/_hdbscan_tree.pyx
@@ -71,7 +71,7 @@ cpdef np.ndarray condense_tree(np.ndarray[np.double_t, ndim=2] hierarchy,
     cdef list result_list
 
     cdef np.ndarray[np.intp_t, ndim=1] relabel
-    cdef np.ndarray[np.int_t, ndim=1] ignore
+    cdef np.ndarray[np.int8_t, ndim=1] ignore
     cdef np.ndarray[np.double_t, ndim=1] children
 
     cdef np.intp_t node

From 34be7d839e9c47a333a5accd5c56ed4e4d29e72b Mon Sep 17 00:00:00 2001
From: Leland McInnes <leland.mcinnes@gmail.com>
Date: Thu, 23 May 2024 19:53:54 -0400
Subject: [PATCH 04/13] Update setup.py

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 87f15a8d..641a2ab7 100644
--- a/setup.py
+++ b/setup.py
@@ -51,7 +51,7 @@ def requirements():
 
 configuration = {
     'name': 'hdbscan',
-    'version': '0.8.34-rc1',
+    'version': '0.8.35',
     'description': 'Clustering based on density with variable density clusters',
     'long_description': readme(),
     'classifiers': [

From aa99a71daa070b64b45018378c9d4bdd9c28f686 Mon Sep 17 00:00:00 2001
From: Leland McInnes <leland.mcinnes@gmail.com>
Date: Fri, 24 May 2024 08:12:50 -0400
Subject: [PATCH 05/13] Update _hdbscan_tree.pyx

---
 hdbscan/_hdbscan_tree.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hdbscan/_hdbscan_tree.pyx b/hdbscan/_hdbscan_tree.pyx
index 842bfa2f..d3c25191 100644
--- a/hdbscan/_hdbscan_tree.pyx
+++ b/hdbscan/_hdbscan_tree.pyx
@@ -91,7 +91,7 @@ cpdef np.ndarray condense_tree(np.ndarray[np.double_t, ndim=2] hierarchy,
     relabel = np.empty(root + 1, dtype=np.intp)
     relabel[root] = num_points
     result_list = []
-    ignore = np.zeros(len(node_list), dtype=int)
+    ignore = np.zeros(len(node_list), dtype=np.int8)
 
     for node in node_list:
         if ignore[node] or node < num_points:

From 5d75efb376326aeeb46f1d40f982569be4ebb907 Mon Sep 17 00:00:00 2001
From: Leland McInnes <leland.mcinnes@gmail.com>
Date: Fri, 24 May 2024 12:20:11 -0400
Subject: [PATCH 06/13] Update setup.py

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 641a2ab7..3da85105 100644
--- a/setup.py
+++ b/setup.py
@@ -51,7 +51,7 @@ def requirements():
 
 configuration = {
     'name': 'hdbscan',
-    'version': '0.8.35',
+    'version': '0.8.36',
     'description': 'Clustering based on density with variable density clusters',
     'long_description': readme(),
     'classifiers': [

From 142c6ac125b72c8433b7915c1816b5fa0e189122 Mon Sep 17 00:00:00 2001
From: Leland McInnes <leland.mcinnes@gmail.com>
Date: Mon, 17 Jun 2024 15:29:41 -0400
Subject: [PATCH 07/13] HDBSCAN doesn't work with numpy 2 yet.

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 3532921c..a69e93d3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 cython>=0.27,<3
-numpy>=1.20
+numpy>=1.20,<2
 scipy>= 1.0
 scikit-learn>=0.20
 joblib>=1.0

From c5fcf4b3829d391eadd14598736a763952790a82 Mon Sep 17 00:00:00 2001
From: Leland McInnes <leland.mcinnes@gmail.com>
Date: Mon, 17 Jun 2024 15:31:07 -0400
Subject: [PATCH 08/13] Update setup.py with version bump

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 3da85105..729c9eee 100644
--- a/setup.py
+++ b/setup.py
@@ -51,7 +51,7 @@ def requirements():
 
 configuration = {
     'name': 'hdbscan',
-    'version': '0.8.36',
+    'version': '0.8.37',
     'description': 'Clustering based on density with variable density clusters',
     'long_description': readme(),
     'classifiers': [

From 29d8286c0cf6fe2def219c6170950f818931c5fc Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastianb@nvidia.com>
Date: Wed, 3 Jul 2024 14:15:26 +0200
Subject: [PATCH 09/13] MAINT: Support NumPy 2 and build Python 3.12 wheels

---
 .github/workflows/pythonpublish_wheel.yml | 4 ++--
 pyproject.toml                            | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/pythonpublish_wheel.yml b/.github/workflows/pythonpublish_wheel.yml
index 20a68931..a1e1f52d 100644
--- a/.github/workflows/pythonpublish_wheel.yml
+++ b/.github/workflows/pythonpublish_wheel.yml
@@ -37,7 +37,7 @@ jobs:
   other-deploy:
     strategy:
       matrix:
-        python: ["3.8", "3.9", "3.10", "3.11"]
+        python: ["3.9", "3.10", "3.11", "3.12"]
         os: [windows-2019, macos-11]
     runs-on: ${{ matrix.os }}
     steps:
@@ -52,7 +52,7 @@ jobs:
         run: |
           python -m pip install --upgrade pip
           pip install setuptools build wheel twine
-          pip install "cython<3" oldest-supported-numpy
+          pip install cython "numpy>=2"
       - name: Build wheel
         run: |
           python -m build --no-isolation
diff --git a/pyproject.toml b/pyproject.toml
index d65564a7..9ea88166 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,6 +2,6 @@
 requires = [
   "setuptools",
   "wheel",
-  "cython<3",
-  "oldest-supported-numpy"
+  "cython<4",
+  "numpy<3"
 ]

From 2e10d38b45100e11f9577c61a4f2d4d93973af96 Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastianb@nvidia.com>
Date: Wed, 3 Jul 2024 14:39:28 +0200
Subject: [PATCH 10/13] Apply simple NumPy 2 fixes and silence most warnings

---
 hdbscan/_hdbscan_linkage.pyx      | 4 ++--
 hdbscan/_hdbscan_reachability.pyx | 2 +-
 hdbscan/_hdbscan_tree.pyx         | 2 +-
 hdbscan/plots.py                  | 2 +-
 hdbscan/prediction.py             | 2 +-
 hdbscan/validity.py               | 2 +-
 6 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/hdbscan/_hdbscan_linkage.pyx b/hdbscan/_hdbscan_linkage.pyx
index a35a958c..738ed6a2 100644
--- a/hdbscan/_hdbscan_linkage.pyx
+++ b/hdbscan/_hdbscan_linkage.pyx
@@ -33,7 +33,7 @@ cpdef np.ndarray[np.double_t, ndim=2] mst_linkage_core(
     result = np.zeros((distance_matrix.shape[0] - 1, 3))
     node_labels = np.arange(distance_matrix.shape[0], dtype=np.intp)
     current_node = 0
-    current_distances = np.infty * np.ones(distance_matrix.shape[0])
+    current_distances = np.inf * np.ones(distance_matrix.shape[0])
     current_labels = node_labels
     for i in range(1, node_labels.shape[0]):
         label_filter = current_labels != current_node
@@ -100,7 +100,7 @@ cpdef np.ndarray[np.double_t, ndim=2] mst_linkage_core_vector(
     result_arr = np.zeros((dim - 1, 3))
     in_tree_arr = np.zeros(dim, dtype=np.int8)
     current_node = 0
-    current_distances_arr = np.infty * np.ones(dim)
+    current_distances_arr = np.inf * np.ones(dim)
     current_sources_arr = np.ones(dim)
 
     result = (<np.double_t[:dim - 1, :3:1]> (<np.double_t *> result_arr.data))
diff --git a/hdbscan/_hdbscan_reachability.pyx b/hdbscan/_hdbscan_reachability.pyx
index 2863dc8a..3f4e3141 100644
--- a/hdbscan/_hdbscan_reachability.pyx
+++ b/hdbscan/_hdbscan_reachability.pyx
@@ -79,7 +79,7 @@ cpdef sparse_mutual_reachability(object lil_matrix, np.intp_t min_points=5,
         if min_points - 1 < len(sorted_row_data):
             core_distance[i] = sorted_row_data[min_points - 1]
         else:
-            core_distance[i] = np.infty
+            core_distance[i] = np.inf
 
     if alpha != 1.0:
         lil_matrix = lil_matrix / alpha
diff --git a/hdbscan/_hdbscan_tree.pyx b/hdbscan/_hdbscan_tree.pyx
index d3c25191..89c76299 100644
--- a/hdbscan/_hdbscan_tree.pyx
+++ b/hdbscan/_hdbscan_tree.pyx
@@ -251,7 +251,7 @@ cdef list bfs_from_cluster_tree(np.ndarray tree, np.intp_t bfs_root):
 
     while to_process.shape[0] > 0:
         result.extend(to_process.tolist())
-        to_process = tree['child'][np.in1d(tree['parent'], to_process)]
+        to_process = tree['child'][np.isin(tree['parent'], to_process)]
 
     return result
 
diff --git a/hdbscan/plots.py b/hdbscan/plots.py
index e82655b3..617721e5 100644
--- a/hdbscan/plots.py
+++ b/hdbscan/plots.py
@@ -28,7 +28,7 @@ def _bfs_from_cluster_tree(tree, bfs_root):
 
     while to_process:
         result.extend(to_process)
-        to_process = tree['child'][np.in1d(tree['parent'], to_process)].tolist()
+        to_process = tree['child'][np.isin(tree['parent'], to_process)].tolist()
 
     return result
 
diff --git a/hdbscan/prediction.py b/hdbscan/prediction.py
index 166975f9..10cd6c60 100644
--- a/hdbscan/prediction.py
+++ b/hdbscan/prediction.py
@@ -81,7 +81,7 @@ def _clusters_below(self, cluster):
         while to_process:
             result.extend(to_process)
             to_process = \
-                self.cluster_tree['child'][np.in1d(self.cluster_tree['parent'],
+                self.cluster_tree['child'][np.isin(self.cluster_tree['parent'],
                                                    to_process)]
             to_process = to_process.tolist()
 
diff --git a/hdbscan/validity.py b/hdbscan/validity.py
index 8ddeb319..36c2edf7 100644
--- a/hdbscan/validity.py
+++ b/hdbscan/validity.py
@@ -180,7 +180,7 @@ def internal_minimum_spanning_tree(mr_distances):
     # A little "fancy" we select from the flattened array reshape back
     # (Fortran format to get indexing right) and take the product to do an and
     # then convert back to boolean type.
-    edge_selection = np.prod(np.in1d(min_span_tree.T[:2], vertices).reshape(
+    edge_selection = np.prod(np.isin(min_span_tree.T[:2], vertices).reshape(
         (min_span_tree.shape[0], 2), order='F'), axis=1).astype(bool)
 
     # Density sparseness is not well defined if there are no

From c3f3d85c4ae948596c1cdab78ba612ecaf87d5b7 Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastianb@nvidia.com>
Date: Thu, 4 Jul 2024 22:59:07 +0200
Subject: [PATCH 11/13] Avoid cython error (but there should be a better way)

---
 hdbscan/_hdbscan_tree.pyx | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/hdbscan/_hdbscan_tree.pyx b/hdbscan/_hdbscan_tree.pyx
index 89c76299..aeb40518 100644
--- a/hdbscan/_hdbscan_tree.pyx
+++ b/hdbscan/_hdbscan_tree.pyx
@@ -725,8 +725,10 @@ cpdef tuple get_clusters(np.ndarray tree, dict stability,
     # if you do, change this accordingly!
     if allow_single_cluster:
         node_list = sorted(stability.keys(), reverse=True)
+        node_list = [int(n) for n in node_list]
     else:
         node_list = sorted(stability.keys(), reverse=True)[:-1]
+        node_list = [int(n) for n in node_list]
         # (exclude root)
 
     cluster_tree = tree[tree['child_size'] > 1]

From 9acb4955e9539a0ba6720522182e1d70807fdec9 Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastianb@nvidia.com>
Date: Thu, 4 Jul 2024 23:00:54 +0200
Subject: [PATCH 12/13] `isin` returns the vertices shape, so the reshape is
 incorrect now

---
 hdbscan/validity.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hdbscan/validity.py b/hdbscan/validity.py
index 36c2edf7..7b09c277 100644
--- a/hdbscan/validity.py
+++ b/hdbscan/validity.py
@@ -180,8 +180,8 @@ def internal_minimum_spanning_tree(mr_distances):
     # A little "fancy" we select from the flattened array reshape back
     # (Fortran format to get indexing right) and take the product to do an and
     # then convert back to boolean type.
-    edge_selection = np.prod(np.isin(min_span_tree.T[:2], vertices).reshape(
-        (min_span_tree.shape[0], 2), order='F'), axis=1).astype(bool)
+    edge_selection = np.prod(
+        np.isin(min_span_tree.T[:2], vertices), axis=0).astype(bool)
 
     # Density sparseness is not well defined if there are no
     # internal edges (as per the referenced paper). However

From ad235f01495a62c13e37cb77f0ec5453107a1e2c Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastianb@nvidia.com>
Date: Fri, 5 Jul 2024 07:59:49 +0200
Subject: [PATCH 13/13] CI: Test with 3.12 and stop testing 3.7 and 3.8

---
 azure-pipelines.yml | 27 +++++++++------------------
 1 file changed, 9 insertions(+), 18 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 2d7b2551..6ef9b53d 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -27,24 +27,6 @@ stages:
       - job: run_platform_tests
         strategy:
           matrix:
-            mac_py37:
-              imageName: 'macOS-latest'
-              python.version: '3.7'
-            linux_py37:
-              imageName: 'ubuntu-latest'
-              python.version: '3.7'
-            windows_py37:
-              imageName: 'windows-latest'
-              python.version: '3.7'            
-            mac_py38:
-              imageName: 'macOS-latest'
-              python.version: '3.8'
-            linux_py38:
-              imageName: 'ubuntu-latest'
-              python.version: '3.8'
-            windows_py38:
-              imageName: 'windows-latest'
-              python.version: '3.8'
             mac_py39:
               imageName: 'macOS-latest'
               python.version: '3.9'
@@ -72,6 +54,15 @@ stages:
             windows_py311:
               imageName: 'windows-latest'
               python.version: '3.11'
+            mac_py312:
+              imageName: 'macOS-latest'
+              python.version: '3.12'
+            linux_py312:
+              imageName: 'ubuntu-latest'
+              python.version: '3.12'
+            windows_py312:
+              imageName: 'windows-latest'
+              python.version: '3.12'
         pool:
           vmImage: $(imageName)