Skip to content

Commit

Permalink
Merge branch 'master' into fix/remove-cython-from-install-requires
Browse files Browse the repository at this point in the history
  • Loading branch information
trxcllnt authored Jul 22, 2024
2 parents bd486d0 + c201b2e commit 8e76624
Show file tree
Hide file tree
Showing 13 changed files with 29 additions and 36 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/pythonpublish_wheel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ jobs:
other-deploy:
strategy:
matrix:
python: ["3.8", "3.9", "3.10", "3.11"]
python: ["3.9", "3.10", "3.11", "3.12"]
os: [windows-2019, macos-11]
runs-on: ${{ matrix.os }}
steps:
Expand All @@ -52,7 +52,7 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install setuptools build wheel twine
pip install "cython<3" oldest-supported-numpy
pip install cython "numpy>=2"
- name: Build wheel
run: |
python -m build --no-isolation
Expand Down
27 changes: 9 additions & 18 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,24 +27,6 @@ stages:
- job: run_platform_tests
strategy:
matrix:
mac_py37:
imageName: 'macOS-latest'
python.version: '3.7'
linux_py37:
imageName: 'ubuntu-latest'
python.version: '3.7'
windows_py37:
imageName: 'windows-latest'
python.version: '3.7'
mac_py38:
imageName: 'macOS-latest'
python.version: '3.8'
linux_py38:
imageName: 'ubuntu-latest'
python.version: '3.8'
windows_py38:
imageName: 'windows-latest'
python.version: '3.8'
mac_py39:
imageName: 'macOS-latest'
python.version: '3.9'
Expand Down Expand Up @@ -72,6 +54,15 @@ stages:
windows_py311:
imageName: 'windows-latest'
python.version: '3.11'
mac_py312:
imageName: 'macOS-latest'
python.version: '3.12'
linux_py312:
imageName: 'ubuntu-latest'
python.version: '3.12'
windows_py312:
imageName: 'windows-latest'
python.version: '3.12'
pool:
vmImage: $(imageName)

Expand Down
2 changes: 1 addition & 1 deletion docs/how_to_use_epsilon.rst
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ In our example, we choose to merge nested clusters below 5 meters (0.005 kilomet
X = np.radians(coordinates) #convert the list of lat/lon coordinates to radians
earth_radius_km = 6371
epsilon = 0.005 / earth_radius #calculate 5 meter epsilon threshold
epsilon = 0.005 / earth_radius_km #calculate 5 meter epsilon threshold
clusterer = hdbscan.HDBSCAN(min_cluster_size=4, metric='haversine',
cluster_selection_epsilon=epsilon, cluster_selection_method = 'eom')
Expand Down
2 changes: 1 addition & 1 deletion docs/parameter_selection.rst
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ Selecting ``min_samples``
Since we have seen that ``min_samples`` clearly has a dramatic effect on
clustering, the question becomes: how do we select this parameter? The
simplest intuition for what ``min_samples`` does is provide a measure of
how conservative you want you clustering to be. The larger the value of
how conservative you want your clustering to be. The larger the value of
``min_samples`` you provide, the more conservative the clustering --
more points will be declared as noise, and clusters will be restricted
to progressively more dense areas. We can see this in practice by
Expand Down
4 changes: 2 additions & 2 deletions hdbscan/_hdbscan_linkage.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ cpdef np.ndarray[np.double_t, ndim=2] mst_linkage_core(
result = np.zeros((distance_matrix.shape[0] - 1, 3))
node_labels = np.arange(distance_matrix.shape[0], dtype=np.intp)
current_node = 0
current_distances = np.infty * np.ones(distance_matrix.shape[0])
current_distances = np.inf * np.ones(distance_matrix.shape[0])
current_labels = node_labels
for i in range(1, node_labels.shape[0]):
label_filter = current_labels != current_node
Expand Down Expand Up @@ -100,7 +100,7 @@ cpdef np.ndarray[np.double_t, ndim=2] mst_linkage_core_vector(
result_arr = np.zeros((dim - 1, 3))
in_tree_arr = np.zeros(dim, dtype=np.int8)
current_node = 0
current_distances_arr = np.infty * np.ones(dim)
current_distances_arr = np.inf * np.ones(dim)
current_sources_arr = np.ones(dim)

result = (<np.double_t[:dim - 1, :3:1]> (<np.double_t *> result_arr.data))
Expand Down
2 changes: 1 addition & 1 deletion hdbscan/_hdbscan_reachability.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ cpdef sparse_mutual_reachability(object lil_matrix, np.intp_t min_points=5,
if min_points - 1 < len(sorted_row_data):
core_distance[i] = sorted_row_data[min_points - 1]
else:
core_distance[i] = np.infty
core_distance[i] = np.inf

if alpha != 1.0:
lil_matrix = lil_matrix / alpha
Expand Down
8 changes: 5 additions & 3 deletions hdbscan/_hdbscan_tree.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ cpdef np.ndarray condense_tree(np.ndarray[np.double_t, ndim=2] hierarchy,
cdef list result_list

cdef np.ndarray[np.intp_t, ndim=1] relabel
cdef np.ndarray[np.int_t, ndim=1] ignore
cdef np.ndarray[np.int8_t, ndim=1] ignore
cdef np.ndarray[np.double_t, ndim=1] children

cdef np.intp_t node
Expand All @@ -91,7 +91,7 @@ cpdef np.ndarray condense_tree(np.ndarray[np.double_t, ndim=2] hierarchy,
relabel = np.empty(root + 1, dtype=np.intp)
relabel[root] = num_points
result_list = []
ignore = np.zeros(len(node_list), dtype=int)
ignore = np.zeros(len(node_list), dtype=np.int8)

for node in node_list:
if ignore[node] or node < num_points:
Expand Down Expand Up @@ -251,7 +251,7 @@ cdef list bfs_from_cluster_tree(np.ndarray tree, np.intp_t bfs_root):

while to_process.shape[0] > 0:
result.extend(to_process.tolist())
to_process = tree['child'][np.in1d(tree['parent'], to_process)]
to_process = tree['child'][np.isin(tree['parent'], to_process)]

return result

Expand Down Expand Up @@ -725,8 +725,10 @@ cpdef tuple get_clusters(np.ndarray tree, dict stability,
# if you do, change this accordingly!
if allow_single_cluster:
node_list = sorted(stability.keys(), reverse=True)
node_list = [int(n) for n in node_list]
else:
node_list = sorted(stability.keys(), reverse=True)[:-1]
node_list = [int(n) for n in node_list]
# (exclude root)

cluster_tree = tree[tree['child_size'] > 1]
Expand Down
2 changes: 1 addition & 1 deletion hdbscan/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def _bfs_from_cluster_tree(tree, bfs_root):

while to_process:
result.extend(to_process)
to_process = tree['child'][np.in1d(tree['parent'], to_process)].tolist()
to_process = tree['child'][np.isin(tree['parent'], to_process)].tolist()

return result

Expand Down
2 changes: 1 addition & 1 deletion hdbscan/prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def _clusters_below(self, cluster):
while to_process:
result.extend(to_process)
to_process = \
self.cluster_tree['child'][np.in1d(self.cluster_tree['parent'],
self.cluster_tree['child'][np.isin(self.cluster_tree['parent'],
to_process)]
to_process = to_process.tolist()

Expand Down
4 changes: 2 additions & 2 deletions hdbscan/validity.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,8 @@ def internal_minimum_spanning_tree(mr_distances):
# A little "fancy" we select from the flattened array reshape back
# (Fortran format to get indexing right) and take the product to do an and
# then convert back to boolean type.
edge_selection = np.prod(np.in1d(min_span_tree.T[:2], vertices).reshape(
(min_span_tree.shape[0], 2), order='F'), axis=1).astype(bool)
edge_selection = np.prod(
np.isin(min_span_tree.T[:2], vertices), axis=0).astype(bool)

# Density sparseness is not well defined if there are no
# internal edges (as per the referenced paper). However
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@
requires = [
"setuptools",
"wheel",
"cython<3",
"oldest-supported-numpy"
"cython<4",
"numpy<3"
]
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
numpy>=1.20
numpy>=1.20,<2
scipy>= 1.0
scikit-learn>=0.20
joblib>=1.0
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def requirements():

configuration = {
'name': 'hdbscan',
'version': '0.8.34-rc1',
'version': '0.8.37',
'description': 'Clustering based on density with variable density clusters',
'long_description': readme(),
'classifiers': [
Expand Down

0 comments on commit 8e76624

Please sign in to comment.