From c2baf5fa0524cf0c159805c3b2419f9d5cf0bbac Mon Sep 17 00:00:00 2001 From: norangebit Date: Sat, 27 Mar 2021 12:32:38 +0100 Subject: [PATCH] Add make file --- makefile | 18 +++ kmp.c => src/kmp.c | 0 kmp.h => src/kmp.h | 0 src/parallel.c | 256 +++++++++++++++++++++++++++++++ sequential.c => src/sequential.c | 18 ++- util.c => src/util.c | 7 + util.h => src/util.h | 2 + 7 files changed, 298 insertions(+), 3 deletions(-) create mode 100644 makefile rename kmp.c => src/kmp.c (100%) rename kmp.h => src/kmp.h (100%) create mode 100644 src/parallel.c rename sequential.c => src/sequential.c (75%) rename util.c => src/util.c (86%) rename util.h => src/util.h (81%) diff --git a/makefile b/makefile new file mode 100644 index 0000000..49f7761 --- /dev/null +++ b/makefile @@ -0,0 +1,18 @@ +sequential: src/sequential.c src/util.c src/kmp.c build + gcc src/kmp.c src/util.c src/sequential.c -o build/kmp-s.out + +sequential-time: src/sequential.c src/util.c src/kmp.c build + gcc -D TIME src/kmp.c src/util.c src/sequential.c -o build/kmp-st.out + +parallel: src/parallel.c src/util.c src/kmp.c build + mpicc -std=c99 src/parallel.c src/util.c src/kmp.c -lm -o build/kmp-p.out + +parallel-time: src/parallel.c src/util.c src/kmp.c build + mpicc -std=c99 -D TIME src/parallel.c src/util.c src/kmp.c -lm -o build/kmp-p.out + + +parallel-log: src/parallel.c src/util.c src/kmp.c build + mpicc -std=c99 -D LOG src/parallel.c src/util.c src/kmp.c -lm -o build/kmp-pl.out + +build: + mkdir build diff --git a/kmp.c b/src/kmp.c similarity index 100% rename from kmp.c rename to src/kmp.c diff --git a/kmp.h b/src/kmp.h similarity index 100% rename from kmp.h rename to src/kmp.h diff --git a/src/parallel.c b/src/parallel.c new file mode 100644 index 0000000..9e7b832 --- /dev/null +++ b/src/parallel.c @@ -0,0 +1,256 @@ +#include +#include +#include "kmp.h" +#include "util.h" +#include +#include + +#ifdef TIME +#include +#endif + +#define MASTER 0 +#define DEFAULT_TAG 17 + +void find_end(int, char *, char *, int **, int *); + +void initialize(char *); + +void distribute_text(); + +void distribute_pattern(); + +void search_for_splitted_pattern(int *residue, int *match_number, int **matches); + +void calculate_absolute_indices(int match_number, int *matches); + +void collect_results(int *match_number, int *matches); + +int rank, size; +int text_len; // length of all text +int private_text_len; // length of private text +int pattern_len; //length of the pattern +char *text; +char *pattern; +int remain = 0; +int *text_piece; +int *displacements; +char *private_text; +int *match_numbers; +int *total_matches; +int total_match_number; + +int main(int argc, char **argv) { + MPI_Init(NULL, NULL); + + MPI_Comm_size(MPI_COMM_WORLD, &size); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + +#ifdef TIME + long_long start_time = PAPI_get_real_usec(); +#endif + if (rank == MASTER) { + char *text_file_path = get_text_file(argc, argv); + initialize(text_file_path); + } + + distribute_text(); + + if (rank == MASTER) + free(text); + +#ifdef LOG + printf("%d -> input: %s\n", rank, private_text); + printf("%d -> text len: %d\n", rank, private_text_len); +#endif + + distribute_pattern(); + + // create and build lps array + int lps[pattern_len]; + create_lps(pattern, pattern_len, lps); + + // search for matches + int match_number = 0; + int residue; +#ifdef TIME + long_long start_search = PAPI_get_real_usec(); +#endif + int *matches = search_pattern(private_text, pattern, lps, &match_number, &residue); + + search_for_splitted_pattern(&residue, &match_number, &matches); + + free(private_text); + + calculate_absolute_indices(match_number, matches); + +#ifdef LOG + printf("%d -> result number: %d\n", rank, match_number); +#endif + + collect_results(&match_number, matches); + +#ifdef TIME + long_long end_time = PAPI_get_real_usec(); +#endif + + MPI_Finalize(); + + if (rank == MASTER) { + printf("total matches: %d\n", total_match_number); +#ifdef LOG + printf("matches index: "); + print_array(total_matches, total_match_number); +#endif + +#ifdef TIME + printf("total elapsed: %d\n", end_time - start_time); + printf("search elapsed: %d\n", end_time - start_search); +#endif + } +} + +void find_end(int residue, char *pattern, char *text, int **matches, int *match_number) { + int pattern_index = residue; + int text_index = 0; + int text_len = strlen(text); + int pattern_len = strlen(pattern); + + // TODO: match splitted on 3+ node + if (pattern_len - residue > text_len) return; + + while (pattern_index < pattern_len && pattern[pattern_index] == text[text_index]) { + pattern_index++; + text_index++; + } + + if (pattern_index != pattern_len) return; + + *match_number = *match_number + 1; + *matches = (int *) realloc(*matches, *match_number * sizeof(int)); + (*matches)[*match_number - 1] = -residue; +} + +void initialize(char *text_file_path) { + text = read_file(text_file_path, &text_len); + pattern = read_file("data/pattern.txt", &pattern_len); + + //printf("text: %s\n", text); + //printf("pattern: %s\n", pattern); + + private_text_len = text_len / size; + remain = text_len % size; + + if (pattern_len > private_text_len) + MPI_Abort(MPI_COMM_WORLD, 1); + + text_piece = (int *) malloc(sizeof(int) * size); + displacements = (int *) malloc(sizeof(int) * size); + + displacements[0] = 0; + for (int i = 0; i < size - 1; ++i) { + text_piece[i] = private_text_len; + displacements[i + 1] = displacements[i] + private_text_len; + } + + text_piece[size - 1] = private_text_len + remain; +} + +void distribute_text() { + // distribution of the length of the text portion to each process + MPI_Scatter(text_piece, 1, MPI_INT, &private_text_len, 1, MPI_INT, MASTER, MPI_COMM_WORLD); + + // allocation of space for text + private_text = (char *) malloc(sizeof(char) * (private_text_len + 1)); + + // distribution of the text portion to each process + // scatterv is necessary because the last process receives a larger portion of text in case it is not perfectly divisible. + MPI_Scatterv( + text, + text_piece, + displacements, + MPI_CHAR, + private_text, + private_text_len, + MPI_CHAR, MASTER, + MPI_COMM_WORLD + ); + private_text[private_text_len] = '\0'; +} + +void distribute_pattern() { + // distribution of the length of the pattern to each process + MPI_Bcast(&pattern_len, 1, MPI_INT, MASTER, MPI_COMM_WORLD); + + // allocation of space for the pattern + // the master has already done so previously + if (rank != MASTER) { + pattern = (char *) malloc(sizeof(char) * (pattern_len + 1)); + } + + // distribution of the pattern to each process + MPI_Bcast(pattern, pattern_len + 1, MPI_CHAR, MASTER, MPI_COMM_WORLD); +} + +void search_for_splitted_pattern(int *residue, int *match_number, int **matches) { + // send the residue to the next process + if (rank + 1 != size) { + MPI_Send(residue, 1, MPI_INT, rank + 1, DEFAULT_TAG, MPI_COMM_WORLD); + } + + // receiving the residue from the previous process + if (rank != 0) { + MPI_Recv(residue, 1, MPI_INT, rank - 1, DEFAULT_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + if (*residue != 0) + // search for a split match + find_end(*residue, pattern, private_text, matches, match_number); + } + +} + +void calculate_absolute_indices(int match_number, int *matches) { + // sending the remain to the last process + // this is necessary for the calculation of shift amount + if (rank == MASTER) + MPI_Send(&remain, 1, MPI_INT, size - 1, DEFAULT_TAG, MPI_COMM_WORLD); + else if (rank == size - 1) + MPI_Recv(&remain, 1, MPI_INT, MASTER, DEFAULT_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + // transformation of match indices from relative to absolute position. + int shift = rank * (private_text_len - remain); // the rest is zero for all but the last process + apply_shift(shift, matches, match_number); +} + +void collect_results(int *match_number, int *matches) { + if (rank == MASTER) { + // allocation for the array containing the number of matches of each process + match_numbers = (int *) malloc(sizeof(int) * size); + } + + // collection of the number of matches from each process + MPI_Gather(match_number, 1, MPI_INT, match_numbers, 1, MPI_INT, MASTER, MPI_COMM_WORLD); + + if (rank == MASTER) { + // preparation of the data structures needed to receive match indices from all processes + total_match_number = sum_array(match_numbers, size); + total_matches = (int *) malloc(sizeof(int) * total_match_number); + + displacements[0] = 0; + for (int i = 0; i < size - 1; ++i) { + displacements[i + 1] = displacements[i] + match_numbers[i]; + } + } + + // collection of match indices from each process + // gatherv is necessary because each process will have a different number of matches + MPI_Gatherv( + matches, + *match_number, + MPI_INT, + total_matches, + match_numbers, + displacements, + MPI_INT, MASTER, + MPI_COMM_WORLD + ); +} diff --git a/sequential.c b/src/sequential.c similarity index 75% rename from sequential.c rename to src/sequential.c index cb36a87..11a9373 100644 --- a/sequential.c +++ b/src/sequential.c @@ -1,14 +1,20 @@ #include "util.h" #include "kmp.h" #include -#include #include -int main() { +#ifdef TIME +#include +#endif + +int main(int argc, char **argv) { + char *text_file_path = get_text_file(argc, argv); int text_len, pattern_len; +#ifdef TIME long_long start_time = PAPI_get_real_usec(); - char *text = read_file("data/dante.txt", &text_len); +#endif + char *text = read_file(text_file_path, &text_len); char *pattern = read_file("data/pattern.txt", &pattern_len); int lps[pattern_len]; @@ -17,12 +23,18 @@ int main() { int match_number = 0; int residue = 0; +#ifdef TIME long_long start_search = PAPI_get_real_usec(); +#endif int *matches = search_pattern(text, pattern, lps, &match_number, &residue); free(text); +#ifdef TIME long_long end_time = PAPI_get_real_usec(); +#endif printf("found %d matches\n", match_number); +#ifdef TIME printf("total elapsed: %d\n", end_time - start_time); printf("search elapsed: %d\n", end_time - start_search); +#endif } diff --git a/util.c b/src/util.c similarity index 86% rename from util.c rename to src/util.c index d09bfa6..0f4630a 100644 --- a/util.c +++ b/src/util.c @@ -1,6 +1,13 @@ #include #include +char *get_text_file(int argc, char **argv) { + if (argc == 1) + return "data/text.txt"; + else + return argv[1]; +} + void print_array(int *array, int length) { if (length == 0) { printf("\n"); diff --git a/util.h b/src/util.h similarity index 81% rename from util.h rename to src/util.h index 5e40f7d..3dc755c 100644 --- a/util.h +++ b/src/util.h @@ -9,4 +9,6 @@ void apply_shift(int, int *, int); int sum_array(int *, int); +char *get_text_file(int argc, char **argv); + #endif //KMP_UTIL_H