Source code for patty.segmentation.dbscan

"""
Point cloud segmentation using the DBSCAN clustering algorithm.

DBSCAN - Density-Based Spatial Clustering of Applications with Noise.
Finds core samples of high density and expands clusters from them.
Good for data which contains clusters of similar density.

See the scikit-learn documentation for reference:
http://scikit-learn.org/stable/modules/generated/sklearn.cluster.DBSCAN.html.
"""
import numpy as np
from sklearn.cluster import dbscan
from patty.utils import extract_mask


[docs]def dbscan_labels(pointcloud, epsilon, minpoints, rgb_weight=0, algorithm='ball_tree'): ''' Find an array of point-labels of clusters found by the DBSCAN algorithm. Parameters ---------- pointcloud : pcl.PointCloud Input pointcloud. epsilon : float Neighborhood radius for DBSCAN. minpoints : integer Minimum neighborhood density for DBSCAN. rgb_weight : float, optional If non-zero, cluster on color information as well as location; specifies the relative weight of the RGB components to spatial coordinates in distance computations. (RGB values have wildly different scales than spatial coordinates.) Returns ------- labels : Sequence A sequence of labels per point. Label -1 indicates a point does not belong to any cluster, other labels indicate the cluster number a point belongs to. ''' if rgb_weight > 0: X = pointcloud.to_array() X[:, 3:] *= rgb_weight else: X = pointcloud _, labels = dbscan(X, eps=epsilon, min_samples=minpoints, algorithm=algorithm) return np.asarray(labels)
[docs]def segment_dbscan(pointcloud, epsilon, minpoints, **kwargs): """Run the DBSCAN clustering+outlier detection algorithm on pointcloud. Parameters ---------- pointcloud : pcl.PointCloud Input pointcloud. epsilon : float Neighborhood radius for DBSCAN. minpoints : integer Minimum neighborhood density for DBSCAN. **kwargs : keyword arguments, optional arguments passed to _dbscan_labels Returns ------- clusters : iterable over registered PointCloud """ labels = dbscan_labels(pointcloud, epsilon, minpoints, **kwargs) return (extract_mask(pointcloud, labels == label) for label in np.unique(labels[labels != -1]))
[docs]def get_largest_dbscan_clusters(pointcloud, min_return_fragment=0.7, epsilon=0.1, minpoints=250, rgb_weight=0): ''' Finds the largest clusters containing together at least min_return_fragment of the complete point cloud. In case less points belong to clusters, all clustered points are returned. Parameters ---------- pointcloud : pcl.PointCloud Input pointcloud. min_return_fragment : float Minimum desired fragment of pointcloud to be returned epsilon : float Neighborhood radius for DBSCAN. minpoints : integer Minimum neighborhood density for DBSCAN. rgb_weight : float, optional If non-zero, cluster on color information as well as location; specifies the relative weight of the RGB components to spatial coordinates in distance computations. (RGB values have wildly different scales than spatial coordinates.) Returns ------- cluster : pcl.PointCloud Registered pointcloud of the largest cluster found by dbscan. ''' labels = dbscan_labels(pointcloud, epsilon, minpoints, rgb_weight=rgb_weight).astype(np.int64) selection, selected_count = _get_top_labels(labels, min_return_fragment) # No clusters were found if selected_count < min_return_fragment * len(labels): return extract_mask(pointcloud, np.ones(len(pointcloud), dtype=bool)) else: mask = [label in selection for label in labels] return extract_mask(pointcloud, mask)
def _get_top_labels(labels, min_return_fragment): """Return labels of the smallest set of clusters that contain at least min_return_fragment of the points (or everything).""" # +1 to make bincount happy, [1:] to get rid of outliers. bins = np.bincount(labels + 1)[1:] labelbinpairs = sorted(enumerate(bins), key=lambda x: x[1]) total = len(labels) minimum = min_return_fragment * total selected = [] selected_count = 0 while selected_count < minimum and len(labelbinpairs) > 0: label, count = labelbinpairs.pop() selected.append(label) selected_count += count return selected, selected_count