[similarity] a *NEW* sequence alignment algorithm which builds on Smith-Waterman-Gotoh with affine gap penalties. Like Smith-Waterman, it performs a local alignment, and like the cost-only version of Gotoh's improvement, it needs O(mn) time and O(m) space (where m is the length of the longer string). However, this version of the algorithm stores and returns a breakdown of the number and specific types of edits it makes (matches, mismatches, gap opens, gap extensions, and transpositions) rather than rolling them up into a single cost, and without needing to return/compute the full alignment as in Needleman-Wunsch or Hirschberg's variant
This commit is contained in:
@@ -6,13 +6,30 @@
|
||||
|
||||
#include "collections.h"
|
||||
|
||||
#define DEFAULT_JARO_WINKLER_PREFIX_SCALE 0.1
|
||||
#define DEFAULT_JARO_WINKLER_BONUS_THRESHOLD 0.7
|
||||
#define DEFAULT_AFFINE_GAP_OPEN_COST 3
|
||||
#define DEFAULT_AFFINE_GAP_EXTEND_COST 2
|
||||
#define DEFAULT_AFFINE_GAP_MATCH_COST 0
|
||||
#define DEFAULT_AFFINE_GAP_MISMATCH_COST 6
|
||||
#define DEFAULT_AFFINE_GAP_TRANSPOSE_COST 4
|
||||
|
||||
typedef struct affine_gap_edits {
|
||||
size_t num_matches;
|
||||
size_t num_mismatches;
|
||||
size_t num_transpositions;
|
||||
size_t num_gap_opens;
|
||||
size_t num_gap_extensions;
|
||||
} affine_gap_edits_t;
|
||||
|
||||
affine_gap_edits_t affine_gap_distance(char *s1, char *s2);
|
||||
affine_gap_edits_t affine_gap_distance_unicode(uint32_array *u1_array, uint32_array *u2_array);
|
||||
|
||||
ssize_t damerau_levenshtein_distance(const char *s1, const char *s2);
|
||||
ssize_t damerau_levenshtein_distance_unicode(uint32_array *u1_array, uint32_array *u2_array, size_t replace_cost);
|
||||
ssize_t damerau_levenshtein_distance_replace_cost(const char *s1, const char *s2, size_t replace_cost);
|
||||
|
||||
#define DEFAULT_JARO_WINKLER_PREFIX_SCALE 0.1
|
||||
#define DEFAULT_JARO_WINKLER_BONUS_THRESHOLD 0.7
|
||||
|
||||
double jaro_distance(const char *s1, const char *s2);
|
||||
double jaro_distance_unicode(uint32_array *u1_array, uint32_array *u2_array);
|
||||
double jaro_winkler_distance_prefix_threshold(const char *s1, const char *s2, double prefix_scale, double bonus_threshold);
|
||||
|
||||
Reference in New Issue
Block a user