#include #include #include "kmp.h" #include "util.h" #include #include #include #define MASTER 0 #define DEFAULT_TAG 17 void find_end(int, char *, char *, int **, int *); void initialize(); void distribute_text(); void distribute_pattern(); void search_for_splitted_pattern(int *residue, int *match_number, int **matches); void calculate_absolute_indices(int match_number, int *matches); void collect_results(int *match_number, int *matches); int rank, size; int text_len; // length of all text int private_text_len; // length of private text int pattern_len; //length of the pattern char *text; char *pattern; int remain = 0; int *text_piece; int *displacements; char *private_text; int *match_numbers; int *total_matches; int total_match_number; int main() { MPI_Init(NULL, NULL); MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); if (rank == MASTER) initialize(); distribute_text(); //printf("%d -> input: %s\n", rank, private_text); //printf("%d -> text len: %d\n", rank, private_text_len); distribute_pattern(); // create and build lps array int lps[pattern_len]; create_lps(pattern, pattern_len, lps); long_long start_time = PAPI_get_real_usec(); // search for matches int match_number = 0; int residue; int *matches = search_pattern(private_text, pattern, lps, &match_number, &residue); search_for_splitted_pattern(&residue, &match_number, &matches); calculate_absolute_indices(match_number, matches); //printf("%d -> result number: %d\n", rank, match_number); collect_results(&match_number, matches); long_long end_time = PAPI_get_real_usec(); MPI_Finalize(); if (rank == MASTER) { printf("total matches: %d\n", total_match_number); printf("matches index: "); print_array(total_matches, total_match_number); printf("elapsed: %d\n", end_time - start_time); } } void find_end(int residue, char *pattern, char *text, int **matches, int *match_number) { int pattern_index = residue; int text_index = 0; int text_len = strlen(text); int pattern_len = strlen(pattern); // TODO: match splitted on 3+ node if (pattern_len - residue > text_len) return; while (pattern_index < pattern_len && pattern[pattern_index] == text[text_index]) { pattern_index++; text_index++; } if (pattern_index != pattern_len) return; *match_number = *match_number + 1; *matches = (int *) realloc(*matches, *match_number * sizeof(int)); (*matches)[*match_number - 1] = -residue; } void initialize() { text = read_file("data/dante.txt", &text_len); pattern = read_file("data/pattern.txt", &pattern_len); //printf("text: %s\n", text); //printf("pattern: %s\n", pattern); private_text_len = text_len / size; remain = text_len % size; if (pattern_len > private_text_len) MPI_Abort(MPI_COMM_WORLD, 1); text_piece = (int *) malloc(sizeof(int) * size); displacements = (int *) malloc(sizeof(int) * size); displacements[0] = 0; for (int i = 0; i < size - 1; ++i) { text_piece[i] = private_text_len; displacements[i + 1] = displacements[i] + private_text_len; } text_piece[size - 1] = private_text_len + remain; } void distribute_text() { // distribution of the length of the text portion to each process MPI_Scatter(text_piece, 1, MPI_INT, &private_text_len, 1, MPI_INT, MASTER, MPI_COMM_WORLD); // allocation of space for text private_text = (char *) malloc(sizeof(char) * (private_text_len + 1)); // distribution of the text portion to each process // scatterv is necessary because the last process receives a larger portion of text in case it is not perfectly divisible. MPI_Scatterv( text, text_piece, displacements, MPI_CHAR, private_text, private_text_len, MPI_CHAR, MASTER, MPI_COMM_WORLD ); private_text[private_text_len] = '\0'; } void distribute_pattern() { // distribution of the length of the pattern to each process MPI_Bcast(&pattern_len, 1, MPI_INT, MASTER, MPI_COMM_WORLD); // allocation of space for the pattern // the master has already done so previously if (rank != MASTER) { pattern = (char *) malloc(sizeof(char) * (pattern_len + 1)); } // distribution of the pattern to each process MPI_Bcast(pattern, pattern_len + 1, MPI_CHAR, MASTER, MPI_COMM_WORLD); } void search_for_splitted_pattern(int *residue, int *match_number, int **matches) { // send the residue to the next process if (rank + 1 != size) { MPI_Send(residue, 1, MPI_INT, rank + 1, DEFAULT_TAG, MPI_COMM_WORLD); } // receiving the residue from the previous process if (rank != 0) { MPI_Recv(residue, 1, MPI_INT, rank - 1, DEFAULT_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE); if (*residue != 0) // search for a split match find_end(*residue, pattern, private_text, matches, match_number); } } void calculate_absolute_indices(int match_number, int *matches) { // sending the remain to the last process // this is necessary for the calculation of shift amount if (rank == MASTER) MPI_Send(&remain, 1, MPI_INT, size - 1, DEFAULT_TAG, MPI_COMM_WORLD); else if (rank == size - 1) MPI_Recv(&remain, 1, MPI_INT, MASTER, DEFAULT_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE); // transformation of match indices from relative to absolute position. int shift = rank * (private_text_len - remain); // the rest is zero for all but the last process apply_shift(shift, matches, match_number); } void collect_results(int *match_number, int *matches) { if (rank == MASTER) { // allocation for the array containing the number of matches of each process match_numbers = (int *) malloc(sizeof(int) * size); } // collection of the number of matches from each process MPI_Gather(match_number, 1, MPI_INT, match_numbers, 1, MPI_INT, MASTER, MPI_COMM_WORLD); if (rank == MASTER) { // preparation of the data structures needed to receive match indices from all processes total_match_number = sum_array(match_numbers, size); total_matches = (int *) malloc(sizeof(int) * total_match_number); displacements[0] = 0; for (int i = 0; i < size - 1; ++i) { displacements[i + 1] = displacements[i] + match_numbers[i]; } } // collection of match indices from each process // gatherv is necessary because each process will have a different number of matches MPI_Gatherv( matches, *match_number, MPI_INT, total_matches, match_numbers, displacements, MPI_INT, MASTER, MPI_COMM_WORLD ); }