Each file read own portion
This commit is contained in:
parent
072ad792f1
commit
95cfa8c1e3
@ -14,12 +14,6 @@
|
|||||||
|
|
||||||
void find_end(int, char *, char *, int **, int *);
|
void find_end(int, char *, char *, int **, int *);
|
||||||
|
|
||||||
void initialize(char *);
|
|
||||||
|
|
||||||
void distribute_text();
|
|
||||||
|
|
||||||
void distribute_pattern();
|
|
||||||
|
|
||||||
void search_for_splitted_pattern(int *residue, int *match_number, int **matches);
|
void search_for_splitted_pattern(int *residue, int *match_number, int **matches);
|
||||||
|
|
||||||
void calculate_absolute_indices(int match_number, int *matches);
|
void calculate_absolute_indices(int match_number, int *matches);
|
||||||
@ -49,23 +43,16 @@ int main(int argc, char **argv) {
|
|||||||
#ifdef TIME
|
#ifdef TIME
|
||||||
long_long start_time = PAPI_get_real_usec();
|
long_long start_time = PAPI_get_real_usec();
|
||||||
#endif
|
#endif
|
||||||
if (rank == MASTER) {
|
|
||||||
char *text_file_path = get_text_file(argc, argv);
|
|
||||||
initialize(text_file_path);
|
|
||||||
}
|
|
||||||
|
|
||||||
distribute_text();
|
char *text_file_path = get_text_file(argc, argv);
|
||||||
|
private_text = read_file_portion(text_file_path, size, rank, &private_text_len, &remain);
|
||||||
if (rank == MASTER)
|
pattern = read_file("data/pattern.txt", &pattern_len);
|
||||||
free(text);
|
|
||||||
|
|
||||||
#ifdef LOG
|
#ifdef LOG
|
||||||
printf("%d -> input: %s\n", rank, private_text);
|
printf("%d -> input: %s\n", rank, private_text);
|
||||||
printf("%d -> text len: %d\n", rank, private_text_len);
|
printf("%d -> text len: %d\n", rank, private_text_len);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
distribute_pattern();
|
|
||||||
|
|
||||||
// create and build lps array
|
// create and build lps array
|
||||||
int lps[pattern_len];
|
int lps[pattern_len];
|
||||||
create_lps(pattern, pattern_len, lps);
|
create_lps(pattern, pattern_len, lps);
|
||||||
@ -80,7 +67,7 @@ int main(int argc, char **argv) {
|
|||||||
|
|
||||||
search_for_splitted_pattern(&residue, &match_number, &matches);
|
search_for_splitted_pattern(&residue, &match_number, &matches);
|
||||||
|
|
||||||
free(private_text);
|
//free(private_text);
|
||||||
|
|
||||||
calculate_absolute_indices(match_number, matches);
|
calculate_absolute_indices(match_number, matches);
|
||||||
|
|
||||||
@ -116,7 +103,6 @@ void find_end(int residue, char *pattern, char *text, int **matches, int *match_
|
|||||||
int text_len = strlen(text);
|
int text_len = strlen(text);
|
||||||
int pattern_len = strlen(pattern);
|
int pattern_len = strlen(pattern);
|
||||||
|
|
||||||
// TODO: match splitted on 3+ node
|
|
||||||
if (pattern_len - residue > text_len) return;
|
if (pattern_len - residue > text_len) return;
|
||||||
|
|
||||||
while (pattern_index < pattern_len && pattern[pattern_index] == text[text_index]) {
|
while (pattern_index < pattern_len && pattern[pattern_index] == text[text_index]) {
|
||||||
@ -131,67 +117,6 @@ void find_end(int residue, char *pattern, char *text, int **matches, int *match_
|
|||||||
(*matches)[*match_number - 1] = -residue;
|
(*matches)[*match_number - 1] = -residue;
|
||||||
}
|
}
|
||||||
|
|
||||||
void initialize(char *text_file_path) {
|
|
||||||
text = read_file(text_file_path, &text_len);
|
|
||||||
pattern = read_file("data/pattern.txt", &pattern_len);
|
|
||||||
|
|
||||||
//printf("text: %s\n", text);
|
|
||||||
//printf("pattern: %s\n", pattern);
|
|
||||||
|
|
||||||
private_text_len = text_len / size;
|
|
||||||
remain = text_len % size;
|
|
||||||
|
|
||||||
if (pattern_len > private_text_len)
|
|
||||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
|
||||||
|
|
||||||
text_piece = (int *) malloc(sizeof(int) * size);
|
|
||||||
displacements = (int *) malloc(sizeof(int) * size);
|
|
||||||
|
|
||||||
displacements[0] = 0;
|
|
||||||
for (int i = 0; i < size - 1; ++i) {
|
|
||||||
text_piece[i] = private_text_len;
|
|
||||||
displacements[i + 1] = displacements[i] + private_text_len;
|
|
||||||
}
|
|
||||||
|
|
||||||
text_piece[size - 1] = private_text_len + remain;
|
|
||||||
}
|
|
||||||
|
|
||||||
void distribute_text() {
|
|
||||||
// distribution of the length of the text portion to each process
|
|
||||||
MPI_Scatter(text_piece, 1, MPI_INT, &private_text_len, 1, MPI_INT, MASTER, MPI_COMM_WORLD);
|
|
||||||
|
|
||||||
// allocation of space for text
|
|
||||||
private_text = (char *) malloc(sizeof(char) * (private_text_len + 1));
|
|
||||||
|
|
||||||
// distribution of the text portion to each process
|
|
||||||
// scatterv is necessary because the last process receives a larger portion of text in case it is not perfectly divisible.
|
|
||||||
MPI_Scatterv(
|
|
||||||
text,
|
|
||||||
text_piece,
|
|
||||||
displacements,
|
|
||||||
MPI_CHAR,
|
|
||||||
private_text,
|
|
||||||
private_text_len,
|
|
||||||
MPI_CHAR, MASTER,
|
|
||||||
MPI_COMM_WORLD
|
|
||||||
);
|
|
||||||
private_text[private_text_len] = '\0';
|
|
||||||
}
|
|
||||||
|
|
||||||
void distribute_pattern() {
|
|
||||||
// distribution of the length of the pattern to each process
|
|
||||||
MPI_Bcast(&pattern_len, 1, MPI_INT, MASTER, MPI_COMM_WORLD);
|
|
||||||
|
|
||||||
// allocation of space for the pattern
|
|
||||||
// the master has already done so previously
|
|
||||||
if (rank != MASTER) {
|
|
||||||
pattern = (char *) malloc(sizeof(char) * (pattern_len + 1));
|
|
||||||
}
|
|
||||||
|
|
||||||
// distribution of the pattern to each process
|
|
||||||
MPI_Bcast(pattern, pattern_len + 1, MPI_CHAR, MASTER, MPI_COMM_WORLD);
|
|
||||||
}
|
|
||||||
|
|
||||||
void search_for_splitted_pattern(int *residue, int *match_number, int **matches) {
|
void search_for_splitted_pattern(int *residue, int *match_number, int **matches) {
|
||||||
// send the residue to the next process
|
// send the residue to the next process
|
||||||
if (rank + 1 != size) {
|
if (rank + 1 != size) {
|
||||||
@ -209,13 +134,6 @@ void search_for_splitted_pattern(int *residue, int *match_number, int **matches)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void calculate_absolute_indices(int match_number, int *matches) {
|
void calculate_absolute_indices(int match_number, int *matches) {
|
||||||
// sending the remain to the last process
|
|
||||||
// this is necessary for the calculation of shift amount
|
|
||||||
if (rank == MASTER)
|
|
||||||
MPI_Send(&remain, 1, MPI_INT, size - 1, DEFAULT_TAG, MPI_COMM_WORLD);
|
|
||||||
else if (rank == size - 1)
|
|
||||||
MPI_Recv(&remain, 1, MPI_INT, MASTER, DEFAULT_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
|
||||||
|
|
||||||
// transformation of match indices from relative to absolute position.
|
// transformation of match indices from relative to absolute position.
|
||||||
int shift = rank * (private_text_len - remain); // the rest is zero for all but the last process
|
int shift = rank * (private_text_len - remain); // the rest is zero for all but the last process
|
||||||
apply_shift(shift, matches, match_number);
|
apply_shift(shift, matches, match_number);
|
||||||
@ -235,6 +153,7 @@ void collect_results(int *match_number, int *matches) {
|
|||||||
total_match_number = sum_array(match_numbers, size);
|
total_match_number = sum_array(match_numbers, size);
|
||||||
total_matches = (int *) malloc(sizeof(int) * total_match_number);
|
total_matches = (int *) malloc(sizeof(int) * total_match_number);
|
||||||
|
|
||||||
|
displacements = (int *) malloc(sizeof(int) * size);
|
||||||
displacements[0] = 0;
|
displacements[0] = 0;
|
||||||
for (int i = 0; i < size - 1; ++i) {
|
for (int i = 0; i < size - 1; ++i) {
|
||||||
displacements[i + 1] = displacements[i] + match_numbers[i];
|
displacements[i + 1] = displacements[i] + match_numbers[i];
|
||||||
|
@ -54,7 +54,7 @@ char *read_file(char *filepath, int *len) {
|
|||||||
return content;
|
return content;
|
||||||
}
|
}
|
||||||
|
|
||||||
char *read_file_portion(char *filepath, int size, int rank, int *portion_len) {
|
char *read_file_portion(char *filepath, int size, int rank, int *portion_len, int *remain) {
|
||||||
FILE *f = fopen(filepath, "r");
|
FILE *f = fopen(filepath, "r");
|
||||||
fseek(f, 0, SEEK_END);
|
fseek(f, 0, SEEK_END);
|
||||||
int total_len = ftell(f) - 1;
|
int total_len = ftell(f) - 1;
|
||||||
@ -63,8 +63,10 @@ char *read_file_portion(char *filepath, int size, int rank, int *portion_len) {
|
|||||||
|
|
||||||
int start_point = rank * len;
|
int start_point = rank * len;
|
||||||
int end_point = start_point + len;
|
int end_point = start_point + len;
|
||||||
|
|
||||||
if (rank + 1 == size)
|
if (rank + 1 == size)
|
||||||
end_point += total_len % size;
|
*remain = total_len % size;
|
||||||
|
end_point += *remain;
|
||||||
|
|
||||||
*portion_len = end_point - start_point;
|
*portion_len = end_point - start_point;
|
||||||
char *content = (char *) malloc(sizeof(char) * (*portion_len));
|
char *content = (char *) malloc(sizeof(char) * (*portion_len));
|
||||||
@ -75,5 +77,5 @@ char *read_file_portion(char *filepath, int size, int rank, int *portion_len) {
|
|||||||
|
|
||||||
content[*portion_len] = '\0';
|
content[*portion_len] = '\0';
|
||||||
|
|
||||||
return NULL;
|
return content;
|
||||||
}
|
}
|
@ -11,6 +11,6 @@ int sum_array(int *, int);
|
|||||||
|
|
||||||
char *get_text_file(int argc, char **argv);
|
char *get_text_file(int argc, char **argv);
|
||||||
|
|
||||||
char *read_file_portion(char *filepath, int size, int rank, int *portion_len);
|
char *read_file_portion(char *filepath, int size, int rank, int *portion_len, int *remain);
|
||||||
|
|
||||||
#endif //KMP_UTIL_H
|
#endif //KMP_UTIL_H
|
||||||
|
Loading…
Reference in New Issue
Block a user