From 24535aff47cbbee87b1dc4e4228504303df0b2dc Mon Sep 17 00:00:00 2001 From: norangebit Date: Mon, 15 Mar 2021 21:04:23 +0100 Subject: [PATCH] Add sequential implementation --- .idea/.gitignore | 8 ++++++ .idea/kmp.iml | 2 ++ .idea/misc.xml | 4 +++ .idea/modules.xml | 8 ++++++ .idea/vcs.xml | 6 +++++ CMakeLists.txt | 6 +++++ kmp.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++ kmp.h | 8 ++++++ main.c | 25 +++++++++++++++++ util.c | 12 +++++++++ util.h | 6 +++++ 11 files changed, 153 insertions(+) create mode 100644 .idea/.gitignore create mode 100644 .idea/kmp.iml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml create mode 100644 CMakeLists.txt create mode 100644 kmp.c create mode 100644 kmp.h create mode 100644 main.c create mode 100644 util.c create mode 100644 util.h diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..73f69e0 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml +# Editor-based HTTP Client requests +/httpRequests/ diff --git a/.idea/kmp.iml b/.idea/kmp.iml new file mode 100644 index 0000000..f08604b --- /dev/null +++ b/.idea/kmp.iml @@ -0,0 +1,2 @@ + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..79b3c94 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..c033581 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..51c9de4 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,6 @@ +cmake_minimum_required(VERSION 3.17) +project(kmp C) + +set(CMAKE_C_STANDARD 99) + +add_executable(kmp main.c kmp.c kmp.h util.h util.c) diff --git a/kmp.c b/kmp.c new file mode 100644 index 0000000..a88db6b --- /dev/null +++ b/kmp.c @@ -0,0 +1,68 @@ +#include +#include + +#define CHUNK 5 + +int *create_lps(char *pattern, int pattern_len, int *lps) { + int last_matched_char = 1; + int last_prefix_char = 0; + + lps[0] = 0; + + while (last_matched_char < pattern_len) { + if (pattern[last_matched_char] == pattern[last_prefix_char]) { + lps[last_matched_char] = last_prefix_char + 1; + last_matched_char++; + last_prefix_char++; + } else if (last_prefix_char != 0) + // check if matches the previous prefix + last_prefix_char = lps[last_prefix_char - 1]; + else { + // if there isn't a prefix restart from the begin + lps[last_matched_char] = 0; + last_matched_char++; + } + } + + return lps; +} + +int *search_pattern(char *text, char *pattern, int *lps, int *match_number, int *residue) { + int size = CHUNK; + int *matches = (int *) malloc(sizeof(int) * size); + int text_index = 0; + int pattern_index = 0; + int text_len = strlen(text); + int pattern_len = strlen(pattern); + + while (text_index < text_len) { + // char matches + if (text[text_index] == pattern[pattern_index]) { + text_index++; + pattern_index++; + + // full match found + if (pattern_index == pattern_len) { + + // growth array if full + if (*match_number == size) { + size *= 2; + matches = (int *) realloc(matches, size * sizeof(int)); + } + + matches[*match_number] = text_index - pattern_len; + *match_number = *match_number + 1; + pattern_index = lps[pattern_index - 1]; + } + // char not matches on first char + } else if (pattern_index == 0) { + text_index++; + } else { + pattern_index = lps[pattern_index - 1]; + } + } + + *residue = pattern_index; + return matches; +} + diff --git a/kmp.h b/kmp.h new file mode 100644 index 0000000..e0e1926 --- /dev/null +++ b/kmp.h @@ -0,0 +1,8 @@ +#ifndef KMP_KMP_H +#define KMP_KMP_H + +int *create_lps(char *, int, int *); + +int *search_pattern(char *, char *, int *, int *, int *); + +#endif //KMP_KMP_H diff --git a/main.c b/main.c new file mode 100644 index 0000000..398942f --- /dev/null +++ b/main.c @@ -0,0 +1,25 @@ +#include +#include +#include "kmp.h" +#include "util.h" + + +int main() { + char *patter = "acabacacd"; + char *text = "acfacabacabacacdac"; + + int pattern_len = strlen(patter); + + // long proper suffix array + int lps[pattern_len]; + + create_lps(patter, pattern_len, lps); + + int match_number = 0; + int residue; + + int *matches = search_pattern(text, patter, lps, &match_number, &residue); + + print_array(matches, match_number); + printf("%d\n", residue); +} diff --git a/util.c b/util.c new file mode 100644 index 0000000..b786b15 --- /dev/null +++ b/util.c @@ -0,0 +1,12 @@ +#include + +void print_array(int *array, int length) { + if (length == 0) return; + + printf("|"); + + for (int i = 0; i < length; ++i) { + printf("%d|", array[i]); + } + printf("\n"); +} diff --git a/util.h b/util.h new file mode 100644 index 0000000..036fc44 --- /dev/null +++ b/util.h @@ -0,0 +1,6 @@ +#ifndef KMP_UTIL_H +#define KMP_UTIL_H + +void print_array(int *, int); + +#endif //KMP_UTIL_H