Disjoint Set Union (Union-Find)
Union & find operations, path compression, union by rank, and cycle detection
Master the Disjoint Set Union data structure for efficient union and find operations on disjoint sets.
DSU Fundamentals
What is Disjoint Set Union?
Disjoint Set Union (DSU) is a data structure that efficiently supports union and find operations on disjoint sets. It's also known as Union-Find and is used for problems involving connected components, cycle detection, and dynamic connectivity.
Basic Structure
class DisjointSetUnion {
private:
vector<int> parent;
vector<int> rank;
int n;
public:
DisjointSetUnion(int size) {
n = size;
parent.resize(n);
rank.resize(n, 0);
// Initialize each element as its own parent
for (int i = 0; i < n; i++) {
parent[i] = i;
}
}
// Find root of element x
int find(int x) {
if (parent[x] != x) {
parent[x] = find(parent[x]); // Path compression
}
return parent[x];
}
// Union two sets
void unionSets(int x, int y) {
int rootX = find(x);
int rootY = find(y);
if (rootX == rootY) return; // Already in same set
// Union by rank
if (rank[rootX] < rank[rootY]) {
parent[rootX] = rootY;
} else if (rank[rootX] > rank[rootY]) {
parent[rootY] = rootX;
} else {
parent[rootY] = rootX;
rank[rootX]++;
}
}
// Check if two elements are in same set
bool connected(int x, int y) {
return find(x) == find(y);
}
// Get number of connected components
int getComponentCount() {
int count = 0;
for (int i = 0; i < n; i++) {
if (parent[i] == i) {
count++;
}
}
return count;
}
};
Union & Find Operations
Basic Find Operation
// Find root without path compression
int findBasic(int x) {
while (parent[x] != x) {
x = parent[x];
}
return x;
}
// Find root with path compression
int find(int x) {
if (parent[x] != x) {
parent[x] = find(parent[x]); // Path compression
}
return parent[x];
}
Basic Union Operation
// Union without optimization
void unionBasic(int x, int y) {
int rootX = find(x);
int rootY = find(y);
if (rootX != rootY) {
parent[rootX] = rootY;
}
}
// Union with rank optimization
void unionSets(int x, int y) {
int rootX = find(x);
int rootY = find(y);
if (rootX == rootY) return;
if (rank[rootX] < rank[rootY]) {
parent[rootX] = rootY;
} else if (rank[rootX] > rank[rootY]) {
parent[rootY] = rootX;
} else {
parent[rootY] = rootX;
rank[rootX]++;
}
}
Path Compression
Path Compression Implementation
// Path compression during find
int findWithPathCompression(int x) {
if (parent[x] != x) {
parent[x] = findWithPathCompression(parent[x]);
}
return parent[x];
}
// Iterative path compression
int findIterative(int x) {
int root = x;
while (parent[root] != root) {
root = parent[root];
}
// Path compression
while (parent[x] != root) {
int next = parent[x];
parent[x] = root;
x = next;
}
return root;
}
Benefits of Path Compression
- Reduces tree height: Flattens the tree structure
- Improves future queries: Makes subsequent finds faster
- Amortized O(α(n)): Where α is the inverse Ackermann function
- Nearly constant time: For practical purposes
Union by Rank
Rank-based Union
// Union by rank
void unionByRank(int x, int y) {
int rootX = find(x);
int rootY = find(y);
if (rootX == rootY) return;
if (rank[rootX] < rank[rootY]) {
parent[rootX] = rootY;
} else if (rank[rootX] > rank[rootY]) {
parent[rootY] = rootX;
} else {
parent[rootY] = rootX;
rank[rootX]++;
}
}
Union by Size
// Union by size
void unionBySize(int x, int y) {
int rootX = find(x);
int rootY = find(y);
if (rootX == rootY) return;
if (size[rootX] < size[rootY]) {
parent[rootX] = rootY;
size[rootY] += size[rootX];
} else {
parent[rootY] = rootX;
size[rootX] += size[rootY];
}
}
Connected Components
Count Connected Components
// Count number of connected components
int countConnectedComponents(vector<vector<int>>& edges, int n) {
DisjointSetUnion dsu(n);
for (auto& edge : edges) {
dsu.unionSets(edge[0], edge[1]);
}
return dsu.getComponentCount();
}
Find Connected Components
// Get all connected components
vector<vector<int>> getConnectedComponents(vector<vector<int>>& edges, int n) {
DisjointSetUnion dsu(n);
for (auto& edge : edges) {
dsu.unionSets(edge[0], edge[1]);
}
// Group nodes by their root
unordered_map<int, vector<int>> components;
for (int i = 0; i < n; i++) {
int root = dsu.find(i);
components[root].push_back(i);
}
vector<vector<int>> result;
for (auto& [root, nodes] : components) {
result.push_back(nodes);
}
return result;
}
Cycle Detection
Detect Cycle in Undirected Graph
// Detect cycle in undirected graph
bool hasCycle(vector<vector<int>>& edges, int n) {
DisjointSetUnion dsu(n);
for (auto& edge : edges) {
int u = edge[0];
int v = edge[1];
if (dsu.connected(u, v)) {
return true; // Cycle detected
}
dsu.unionSets(u, v);
}
return false;
}
Detect Cycle in Directed Graph
// Detect cycle in directed graph using DSU
bool hasCycleDirected(vector<vector<int>>& edges, int n) {
DisjointSetUnion dsu(n);
for (auto& edge : edges) {
int u = edge[0];
int v = edge[1];
if (dsu.connected(u, v)) {
return true; // Cycle detected
}
dsu.unionSets(u, v);
}
return false;
}
Advanced Applications
Minimum Spanning Tree (Kruskal's Algorithm)
// Kruskal's algorithm using DSU
int kruskalMST(vector<vector<int>>& edges, int n) {
// Sort edges by weight
sort(edges.begin(), edges.end(), [](const vector<int>& a, const vector<int>& b) {
return a[2] < b[2]; // Assuming weight is at index 2
});
DisjointSetUnion dsu(n);
int mstWeight = 0;
int edgesUsed = 0;
for (auto& edge : edges) {
int u = edge[0];
int v = edge[1];
int weight = edge[2];
if (!dsu.connected(u, v)) {
dsu.unionSets(u, v);
mstWeight += weight;
edgesUsed++;
if (edgesUsed == n - 1) {
break; // MST complete
}
}
}
return mstWeight;
}
Number of Islands
// Count number of islands using DSU
int numIslands(vector<vector<char>>& grid) {
if (grid.empty()) return 0;
int m = grid.size();
int n = grid[0].size();
DisjointSetUnion dsu(m * n);
int islands = 0;
for (int i = 0; i < m; i++) {
for (int j = 0; j < n; j++) {
if (grid[i][j] == '1') {
islands++;
// Check adjacent cells
int directions[4][2] = {{0, 1}, {1, 0}, {0, -1}, {-1, 0}};
for (auto& dir : directions) {
int ni = i + dir[0];
int nj = j + dir[1];
if (ni >= 0 && ni < m && nj >= 0 && nj < n && grid[ni][nj] == '1') {
int current = i * n + j;
int neighbor = ni * n + nj;
if (!dsu.connected(current, neighbor)) {
dsu.unionSets(current, neighbor);
islands--;
}
}
}
}
}
}
return islands;
}
Redundant Connection
// Find redundant connection in graph
vector<int> findRedundantConnection(vector<vector<int>>& edges) {
int n = edges.size();
DisjointSetUnion dsu(n + 1);
for (auto& edge : edges) {
int u = edge[0];
int v = edge[1];
if (dsu.connected(u, v)) {
return edge; // Redundant edge found
}
dsu.unionSets(u, v);
}
return {}; // No redundant edge
}
Performance Analysis
Time Complexity
- Find: O(α(n)) with path compression
- Union: O(α(n)) with union by rank
- Connected: O(α(n))
- α(n): Inverse Ackermann function (nearly constant)
Space Complexity
- O(n): For parent and rank arrays
- O(n): Additional space for size array if needed
Amortized Analysis
- Without optimizations: O(log n) per operation
- With path compression: O(α(n)) per operation
- With union by rank: O(α(n)) per operation
- α(n) < 5: For all practical values of n
Common Patterns
- Connected components in graphs
- Cycle detection in undirected graphs
- Minimum spanning tree algorithms
- Dynamic connectivity problems
- Union-find in graph algorithms
Applications
- Graph algorithms: Connected components, MST, cycle detection
- Network connectivity: Check if nodes are connected, route optimization
- Image processing: Connected component labeling, blob detection
- Social networks: Friend groups and communities, influence mapping
- Competitive programming: Efficient union-find operations
- Bioinformatics: DNA sequence clustering
- Electrical circuits: Finding connected components
Real-World Example: Social Network Friends
// Find groups of friends (connected components)
class SocialNetwork {
DisjointSetUnion friendships;
public:
void addFriendship(int user1, int user2) {
friendships.unionSets(user1, user2);
}
bool areFriends(int user1, int user2) {
return friendships.connected(user1, user2);
}
int getComponentSize(int user) {
// All users in same friend group
int root = friendships.find(user);
int count = 0;
for (int i = 0; i < totalUsers; i++) {
if (friendships.find(i) == root) count++;
}
return count;
}
};
// Usage
SocialNetwork network(1000);
network.addFriendship(0, 1); // 0 and 1 are friends
network.addFriendship(1, 2); // 1 and 2 are friends
network.addFriendship(3, 4); // 3 and 4 are friends
network.areFriends(0, 2); // true (0-1-2 connected)
network.areFriendship(0, 4); // false (different groups)
Performance Optimization Tips
- Path Compression: Always implement, nearly O(1) amortized
- Union by Rank: Keeps tree shallow, critical for performance
- Avoid Union by Size: Rank is more reliable
- Iterate Carefully: Don't create 10M elements for 1000 nodes
- Profile: Measure whether DSU is actually your bottleneck
When to Use DSU
- Dynamic connectivity problems (online union/find queries)
- Cycle detection in graphs (linear time, no DFS needed)
- Connected components analysis (faster than DFS for sparse graphs)
- Minimum spanning tree algorithms (Kruskal's is simple with DSU)
- Union-find operations are frequent and performance-critical