From 95cfa8c1e300026a9d21767f3dfd9bc2d37baaf5 Mon Sep 17 00:00:00 2001 From: norangebit Date: Sat, 27 Mar 2021 14:28:35 +0100 Subject: [PATCH] Each file read own portion --- src/parallel.c | 91 +++----------------------------------------------- src/util.c | 8 +++-- src/util.h | 2 +- 3 files changed, 11 insertions(+), 90 deletions(-) diff --git a/src/parallel.c b/src/parallel.c index 9e7b832..81ed393 100644 --- a/src/parallel.c +++ b/src/parallel.c @@ -14,12 +14,6 @@ void find_end(int, char *, char *, int **, int *); -void initialize(char *); - -void distribute_text(); - -void distribute_pattern(); - void search_for_splitted_pattern(int *residue, int *match_number, int **matches); void calculate_absolute_indices(int match_number, int *matches); @@ -49,23 +43,16 @@ int main(int argc, char **argv) { #ifdef TIME long_long start_time = PAPI_get_real_usec(); #endif - if (rank == MASTER) { - char *text_file_path = get_text_file(argc, argv); - initialize(text_file_path); - } - distribute_text(); - - if (rank == MASTER) - free(text); + char *text_file_path = get_text_file(argc, argv); + private_text = read_file_portion(text_file_path, size, rank, &private_text_len, &remain); + pattern = read_file("data/pattern.txt", &pattern_len); #ifdef LOG printf("%d -> input: %s\n", rank, private_text); printf("%d -> text len: %d\n", rank, private_text_len); #endif - distribute_pattern(); - // create and build lps array int lps[pattern_len]; create_lps(pattern, pattern_len, lps); @@ -80,7 +67,7 @@ int main(int argc, char **argv) { search_for_splitted_pattern(&residue, &match_number, &matches); - free(private_text); + //free(private_text); calculate_absolute_indices(match_number, matches); @@ -116,7 +103,6 @@ void find_end(int residue, char *pattern, char *text, int **matches, int *match_ int text_len = strlen(text); int pattern_len = strlen(pattern); - // TODO: match splitted on 3+ node if (pattern_len - residue > text_len) return; while (pattern_index < pattern_len && pattern[pattern_index] == text[text_index]) { @@ -131,67 +117,6 @@ void find_end(int residue, char *pattern, char *text, int **matches, int *match_ (*matches)[*match_number - 1] = -residue; } -void initialize(char *text_file_path) { - text = read_file(text_file_path, &text_len); - pattern = read_file("data/pattern.txt", &pattern_len); - - //printf("text: %s\n", text); - //printf("pattern: %s\n", pattern); - - private_text_len = text_len / size; - remain = text_len % size; - - if (pattern_len > private_text_len) - MPI_Abort(MPI_COMM_WORLD, 1); - - text_piece = (int *) malloc(sizeof(int) * size); - displacements = (int *) malloc(sizeof(int) * size); - - displacements[0] = 0; - for (int i = 0; i < size - 1; ++i) { - text_piece[i] = private_text_len; - displacements[i + 1] = displacements[i] + private_text_len; - } - - text_piece[size - 1] = private_text_len + remain; -} - -void distribute_text() { - // distribution of the length of the text portion to each process - MPI_Scatter(text_piece, 1, MPI_INT, &private_text_len, 1, MPI_INT, MASTER, MPI_COMM_WORLD); - - // allocation of space for text - private_text = (char *) malloc(sizeof(char) * (private_text_len + 1)); - - // distribution of the text portion to each process - // scatterv is necessary because the last process receives a larger portion of text in case it is not perfectly divisible. - MPI_Scatterv( - text, - text_piece, - displacements, - MPI_CHAR, - private_text, - private_text_len, - MPI_CHAR, MASTER, - MPI_COMM_WORLD - ); - private_text[private_text_len] = '\0'; -} - -void distribute_pattern() { - // distribution of the length of the pattern to each process - MPI_Bcast(&pattern_len, 1, MPI_INT, MASTER, MPI_COMM_WORLD); - - // allocation of space for the pattern - // the master has already done so previously - if (rank != MASTER) { - pattern = (char *) malloc(sizeof(char) * (pattern_len + 1)); - } - - // distribution of the pattern to each process - MPI_Bcast(pattern, pattern_len + 1, MPI_CHAR, MASTER, MPI_COMM_WORLD); -} - void search_for_splitted_pattern(int *residue, int *match_number, int **matches) { // send the residue to the next process if (rank + 1 != size) { @@ -209,13 +134,6 @@ void search_for_splitted_pattern(int *residue, int *match_number, int **matches) } void calculate_absolute_indices(int match_number, int *matches) { - // sending the remain to the last process - // this is necessary for the calculation of shift amount - if (rank == MASTER) - MPI_Send(&remain, 1, MPI_INT, size - 1, DEFAULT_TAG, MPI_COMM_WORLD); - else if (rank == size - 1) - MPI_Recv(&remain, 1, MPI_INT, MASTER, DEFAULT_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - // transformation of match indices from relative to absolute position. int shift = rank * (private_text_len - remain); // the rest is zero for all but the last process apply_shift(shift, matches, match_number); @@ -235,6 +153,7 @@ void collect_results(int *match_number, int *matches) { total_match_number = sum_array(match_numbers, size); total_matches = (int *) malloc(sizeof(int) * total_match_number); + displacements = (int *) malloc(sizeof(int) * size); displacements[0] = 0; for (int i = 0; i < size - 1; ++i) { displacements[i + 1] = displacements[i] + match_numbers[i]; diff --git a/src/util.c b/src/util.c index bc8cd6d..1fbff40 100644 --- a/src/util.c +++ b/src/util.c @@ -54,7 +54,7 @@ char *read_file(char *filepath, int *len) { return content; } -char *read_file_portion(char *filepath, int size, int rank, int *portion_len) { +char *read_file_portion(char *filepath, int size, int rank, int *portion_len, int *remain) { FILE *f = fopen(filepath, "r"); fseek(f, 0, SEEK_END); int total_len = ftell(f) - 1; @@ -63,8 +63,10 @@ char *read_file_portion(char *filepath, int size, int rank, int *portion_len) { int start_point = rank * len; int end_point = start_point + len; + if (rank + 1 == size) - end_point += total_len % size; + *remain = total_len % size; + end_point += *remain; *portion_len = end_point - start_point; char *content = (char *) malloc(sizeof(char) * (*portion_len)); @@ -75,5 +77,5 @@ char *read_file_portion(char *filepath, int size, int rank, int *portion_len) { content[*portion_len] = '\0'; - return NULL; + return content; } \ No newline at end of file diff --git a/src/util.h b/src/util.h index 275df53..e90b564 100644 --- a/src/util.h +++ b/src/util.h @@ -11,6 +11,6 @@ int sum_array(int *, int); char *get_text_file(int argc, char **argv); -char *read_file_portion(char *filepath, int size, int rank, int *portion_len); +char *read_file_portion(char *filepath, int size, int rank, int *portion_len, int *remain); #endif //KMP_UTIL_H