721. Accounts Merge
Given a list of accounts where each element accounts[i] is a list of strings, where the first element accounts[i][0] is a name, and the rest of the elements are emails representing emails of the account.
Now, we would like to merge these accounts. Two accounts definitely belong to the same person if there is some common email to both accounts. Note that even if two accounts have the same name, they may belong to different people as people could have the same name. A person can have any number of accounts initially, but all of their accounts definitely have the same name.
After merging the accounts, return the accounts in the following format: the first element of each account is the name, and the rest of the elements are emails in sorted order. The accounts themselves can be returned in any order.
Example 1:
Input: accounts = [[“John”,“johnsmith@mail.com”,“john_newyork@mail.com”],[“John”,“johnsmith@mail.com”,“john00@mail.com”],[“Mary”,“mary@mail.com”],[“John”,“johnnybravo@mail.com”]]
Output: [[“John”,“john00@mail.com”,“john_newyork@mail.com”,“johnsmith@mail.com”],[“Mary”,“mary@mail.com”],[“John”,“johnnybravo@mail.com”]]
Explanation: The first and second John’s are the same person as they have the common email “johnsmith@mail.com”.
The third John and Mary are different people as none of their email addresses are used by other accounts.
We could return these lists in any order, for example the answer [[‘Mary’, ‘mary@mail.com’], [‘John’, ‘johnnybravo@mail.com’],
[‘John’, ‘john00@mail.com’, ‘john_newyork@mail.com’, ‘johnsmith@mail.com’]] would still be accepted.
Example 2:
Input: accounts = [[“Gabe”,“Gabe0@m.co”,“Gabe3@m.co”,“Gabe1@m.co”],[“Kevin”,“Kevin3@m.co”,“Kevin5@m.co”,“Kevin0@m.co”],[“Ethan”,“Ethan5@m.co”,“Ethan4@m.co”,“Ethan0@m.co”],[“Hanzo”,“Hanzo3@m.co”,“Hanzo1@m.co”,“Hanzo0@m.co”],[“Fern”,“Fern5@m.co”,“Fern1@m.co”,“Fern0@m.co”]]
Output: [[“Ethan”,“Ethan0@m.co”,“Ethan4@m.co”,“Ethan5@m.co”],[“Gabe”,“Gabe0@m.co”,“Gabe1@m.co”,“Gabe3@m.co”],[“Hanzo”,“Hanzo0@m.co”,“Hanzo1@m.co”,“Hanzo3@m.co”],[“Kevin”,“Kevin0@m.co”,“Kevin3@m.co”,“Kevin5@m.co”],[“Fern”,“Fern0@m.co”,“Fern1@m.co”,“Fern5@m.co”]]
Constraints:
- 1 <= accounts.length <= 1000
- 2 <= accounts[i].length <= 10
- 1 <= accounts[i][j].length <= 30
- accounts[i][0] consists of English letters.
- accounts[i][j] (for j > 0) is a valid email.
From: LeetCode
Link: 721. Accounts Merge
Solution:
Ideas:
1. Build Graph:
- For each account, link all emails in the same account via edges.
- Ensure even single (disconnected) emails are added as nodes.
2. Map Emails to Names:
- For each email in all accounts, map it to the corresponding name.
3. Traverse with DFS:
- For each unvisited email node in the graph:
- Start DFS and collect all reachable emails (i.e., connected component).
- Store collected emails in a dynamic list.
- Sort emails alphabetically.
4. Format Output:
- Retrieve the name using the first email.
- Create [name, sorted_emails…] array and add to final result.
Code:
typedef struct EmailNode EmailNode;
typedef struct EmailList EmailList;
struct EmailList {
char *email;
EmailList *next;
};
struct EmailNode {
char *email;
EmailList *neighbors;
UT_hash_handle hh;
};
typedef struct {
char *email;
char *name;
UT_hash_handle hh;
} EmailToName;
void addEdge(EmailNode **graph, const char *from, const char *to) {
EmailNode *node;
HASH_FIND_STR(*graph, from, node);
if (!node) {
node = (EmailNode *)malloc(sizeof(EmailNode));
node->email = strdup(from);
node->neighbors = NULL;
HASH_ADD_KEYPTR(hh, *graph, node->email, strlen(node->email), node);
}
EmailList *newNeighbor = (EmailList *)malloc(sizeof(EmailList));
newNeighbor->email = strdup(to);
newNeighbor->next = node->neighbors;
node->neighbors = newNeighbor;
}
// Dynamic buffer expansion
void safe_add_email(char ***emails, int *count, int *capacity, char *email) {
if (*count >= *capacity) {
*capacity *= 2;
*emails = realloc(*emails, sizeof(char*) * (*capacity));
}
(*emails)[(*count)++] = email;
}
void dfs(EmailNode *node, EmailNode **graph, EmailNode **visited, char ***emails, int *count, int *capacity) {
EmailNode *found = NULL;
HASH_FIND_STR(*visited, node->email, found);
if (found) return;
EmailNode *v = (EmailNode *)malloc(sizeof(EmailNode));
v->email = node->email;
v->neighbors = NULL;
HASH_ADD_KEYPTR(hh, *visited, v->email, strlen(v->email), v);
safe_add_email(emails, count, capacity, node->email);
for (EmailList *nbr = node->neighbors; nbr; nbr = nbr->next) {
EmailNode *next;
HASH_FIND_STR(*graph, nbr->email, next);
if (next) {
dfs(next, graph, visited, emails, count, capacity);
}
}
}
int cmpstr(const void *a, const void *b) {
return strcmp(*(char **)a, *(char **)b);
}
/**
* Return an array of arrays of size *returnSize.
* The sizes of the arrays are returned as *returnColumnSizes array.
* Note: Both returned array and *columnSizes array must be malloced, assume caller calls free().
*/
char*** accountsMerge(char*** accounts, int accountsSize, int* accountsColSize, int* returnSize, int** returnColumnSizes) {
EmailNode *graph = NULL;
EmailToName *emailToName = NULL;
EmailNode *visited = NULL;
// Step 1: Build graph and map emails to names
for (int i = 0; i < accountsSize; i++) {
char *name = accounts[i][0];
for (int j = 1; j < accountsColSize[i]; j++) {
// Map email to name
EmailToName *entry;
HASH_FIND_STR(emailToName, accounts[i][j], entry);
if (!entry) {
entry = (EmailToName *)malloc(sizeof(EmailToName));
entry->email = strdup(accounts[i][j]);
entry->name = strdup(name);
HASH_ADD_KEYPTR(hh, emailToName, entry->email, strlen(entry->email), entry);
}
// Build graph edges
if (j > 1) {
addEdge(&graph, accounts[i][1], accounts[i][j]);
addEdge(&graph, accounts[i][j], accounts[i][1]);
} else {
// ensure node exists even if unconnected
EmailNode *node;
HASH_FIND_STR(graph, accounts[i][j], node);
if (!node) {
node = (EmailNode *)malloc(sizeof(EmailNode));
node->email = strdup(accounts[i][j]);
node->neighbors = NULL;
HASH_ADD_KEYPTR(hh, graph, node->email, strlen(node->email), node);
}
}
}
}
// Step 2: DFS traversal
char ***res = malloc(sizeof(char**) * accountsSize);
*returnColumnSizes = malloc(sizeof(int) * accountsSize);
*returnSize = 0;
EmailNode *node, *tmp;
HASH_ITER(hh, graph, node, tmp) {
EmailNode *found;
HASH_FIND_STR(visited, node->email, found);
if (!found) {
int capacity = 16;
int count = 0;
char **emails = malloc(sizeof(char*) * capacity);
dfs(node, &graph, &visited, &emails, &count, &capacity);
qsort(emails, count, sizeof(char*), cmpstr);
EmailToName *entry;
HASH_FIND_STR(emailToName, emails[0], entry);
res[*returnSize] = malloc(sizeof(char*) * (count + 1));
res[*returnSize][0] = entry->name;
for (int i = 0; i < count; i++) {
res[*returnSize][i + 1] = emails[i];
}
(*returnColumnSizes)[*returnSize] = count + 1;
(*returnSize)++;
free(emails);
}
}
return res;
}