____ ____ _________ import java.util.ArrayList; ____ ____ _________ ____ ____ ____ import java.util.List; ____ ____ ____ ____ ____ ______ import java.util.Random; ____ ____ ______ ______ public class KMeans { ______ _ private static int k; _ _____________ private static int maxIterations; _____________ ____ ______ _________ private static List centroids; ____ ______ _________ ____ _______ ______ ____ ______ __________ _ _____________ public static List kMeans(List dataPoints, int k, int maxIterations) { ____ _______ ______ ____ ______ __________ _ _____________ ______ _ _ KMeans.k = k; ______ _ _ ______ _____________ _____________ KMeans.maxIterations = maxIterations; ______ _____________ _____________ _________ _________ centroids = new ArrayList<>(); _________ _________ _________ ___________________ __________ centroids = initializeCentroids(dataPoints); _________ ___________________ __________ ____ _______ __________________ _________ List clusterAssignments = new ArrayList<>(); ____ _______ __________________ _________ _ _ _ _____________ _ for (int i = 0; i < maxIterations; i++) { _ _ _ _____________ _ __________________ ______________ __________ clusterAssignments = assignClusters(dataPoints); __________________ ______________ __________ ____ ______ ____________ _______________ __________ __________________ List newCentroids = updateCentroids(dataPoints, clusterAssignments); ____ ______ ____________ _______________ __________ __________________ _________ ____________ if (converged(newCentroids)) { _________ ____________ break; } _________ ____________ centroids = newCentroids; _________ ____________ } __________________ return clusterAssignments; __________________ } ____ ______ ___________________ ____ __________ private static List initializeCentroids(List dataPoints) { ____ ______ ___________________ ____ __________ ____ ______ _________ _________ List centroids = new ArrayList<>(); ____ ______ _________ _________ ______ ______ ______ Random random = new Random(); ______ ______ ______ _ _ _ _ for (int i = 0; i < k; i++) { _ _ _ _ _____ ______ _______ __________ ____ int index = random.nextInt(dataPoints.size()); _____ ______ _______ __________ ____ _________ ___ __________ ___ _____ _____ centroids.add(dataPoints.get(index).clone()); _________ ___ __________ ___ _____ _____ } _________ return centroids; _________ } ____ _______ ______________ ____ ______ __________ private static List assignClusters(List dataPoints) { ____ _______ ______________ ____ ______ __________ ____ _______ __________________ _________ List clusterAssignments = new ArrayList<>(); ____ _______ __________________ _________ ______ _____ __________ for (Double[] point : dataPoints) { ______ _____ __________ ______ ___________ ______ _________ Double minDistance = Double.MAX_VALUE; ______ ___________ ______ _________ _______ _ int cluster = 0; _______ _ _ _ _________ ____ _ for (int i = 0; i < centroids.size(); i++) { _ _ _________ ____ _ ______ ________ _________________ _____ _________ ___ _ Double distance = calculateDistance(point, centroids.get(i)); ______ ________ _________________ _____ _________ ___ _ ________ ___________ if (distance < minDistance) { ________ ___________ ___________ ________ minDistance = distance; ___________ ________ _______ _ cluster = i; _______ _ } } __________________ ___ _______ clusterAssignments.add(cluster); __________________ ___ _______ } __________________ return clusterAssignments; __________________ } ______ _________________ ______ ______ private static Double calculateDistance(Double[] point1, Double[] point2) { ______ _________________ ______ ______ ______ ________ _ _ Double distance = 0.0; ______ ________ _ _ _ _ ______ ______ _ for (int i = 0; i < point1.length; i++) { _ _ ______ ______ _ ________ ____ ___ ______ _ ______ _ distance += Math.pow(point1[i] - point2[i], 2); ________ ____ ___ ______ _ ______ _ } ____ ____ ________ return Math.sqrt(distance); ____ ____ ________ } ____ ______ _______________ ____ ______ __________ ____ _______ __________________ private static List updateCentroids(List dataPoints, List clusterAssignments) { ____ ______ _______________ ____ ______ __________ ____ _______ __________________ ____ ______ ____________ _________ List newCentroids = new ArrayList<>(); ____ ______ ____________ _________ __________ __________ ___ ______ int dimensions = dataPoints.get(0).length; __________ __________ ___ ______ _ _ _ _ for (int i = 0; i < k; i++) { _ _ _ _ ____ ______ _______________ _________ List pointsInCluster = new ArrayList<>(); ____ ______ _______________ _________ _ _ __________ ____ _ for (int j = 0; j < dataPoints.size(); j++) { _ _ __________ ____ _ __________________ ___ _ _ if (clusterAssignments.get(j) == i) { __________________ ___ _ _ _______________ ___ __________ ___ _ pointsInCluster.add(dataPoints.get(j)); _______________ ___ __________ ___ _ } } _______________ _______ if (pointsInCluster.isEmpty()) { _______________ _______ ____________ ___ ______ __________ newCentroids.add(new Double[dimensions]); ____________ ___ ______ __________ continue; } ______ ___________ ______ __________ Double[] newCentroid = new Double[dimensions]; ______ ___________ ______ __________ _ _ __________ _ for (int d = 0; d < dimensions; d++) { _ _ __________ _ ___ double sum = 0.0; ___ ______ _____ _______________ for (Double[] point : pointsInCluster) { ______ _____ _______________ ___ _____ _ sum += point[d]; ___ _____ _ } ___________ _ ___ _______________ ____ newCentroid[d] = sum / pointsInCluster.size(); ___________ _ ___ _______________ ____ } ____________ ___ ___________ newCentroids.add(newCentroid); ____________ ___ ___________ } ____________ return newCentroids; ____________ } _________ ____ ____________ private static boolean converged(List newCentroids) { _________ ____ ____________ _ _ _________ ____ _ for (int i = 0; i < centroids.size(); i++) { _ _ _________ ____ _ _________ ___ _ ______ ____________ ___ _ if (!centroids.get(i).equals(newCentroids.get(i))) { _________ ___ _ ______ ____________ ___ _ return false; } } return true; } ____ ______ ____ public static void main(String[] args) { ____ ______ ____ ____ ______ __________ _________ List dataPoints = new ArrayList<>(); ____ ______ __________ _________ __________ ___ ______ dataPoints.add(new Double[]{1.0, 2.0}); __________ ___ ______ __________ ___ ______ dataPoints.add(new Double[]{1.5, 1.8}); __________ ___ ______ __________ ___ ______ dataPoints.add(new Double[]{5.0, 8.0}); __________ ___ ______ __________ ___ ______ dataPoints.add(new Double[]{8.0, 8.0}); __________ ___ ______ __________ ___ ______ dataPoints.add(new Double[]{1.0, 0.6}); __________ ___ ______ __________ ___ ______ dataPoints.add(new Double[]{9.0, 11.0}); __________ ___ ______ _ int k = 2; _ _____________ int maxIterations = 100; _____________ ____ _______ ________ ______ __________ _ _____________ List clusters = kMeans(dataPoints, k, maxIterations); ____ _______ ________ ______ __________ _ _____________ _ _ __________ ____ _ for (int i = 0; i < dataPoints.size(); i++) { _ _ __________ ____ _ ______ ___ _______ _ ________ ___ _ System.out.println("Data point " + (i + 1) + " belongs to cluster " + (clusters.get(i) + 1)); ______ ___ _______ _ ________ ___ _ } } }