Hymn Script
Home Learn Play Download Source Light

hymn_pattern.c


/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */

#include <ctype.h>

#include "hymn_pattern.h"

#define MAX_CAPTURES 16

typedef struct Capture Capture;
typedef struct Match Match;

struct Capture {
    const char *begin;
    size_t size;
};

struct Match {
    const char *error;
    int count;
    char padding[4];
    Capture capture[MAX_CAPTURES];
};

static bool class(int type, int c) {
    if (type == '\0') return false;
    int result;
    switch (tolower(type)) {
    case 'a': result = isalpha(c); break;
    case 'c': result = iscntrl(c); break;
    case 'd': result = isdigit(c); break;
    case 'l': result = islower(c); break;
    case 'p': result = ispunct(c); break;
    case 's': result = isspace(c); break;
    case 'u': result = isupper(c); break;
    case 'w': result = isalnum(c); break;
    case 'x': result = isxdigit(c); break;
    case 'z': result = (c == 0); break;
    default: return (c == type);
    }
    return islower(type) ? result : !result;
}

static char *possible(Match *group, char *pattern) {
    char c = pattern[0];
    pattern++;
    switch (c) {
    case '%': {
        if (pattern[0] == '\0') {
            group->error = "malformed end of class";
        }
        return pattern + 1;
    }
    case '[': {
        while (true) {
            c = pattern[0];
            if (c == '\0') {
                group->error = "malformed end of class";
                return pattern;
            } else if (c == ']') {
                return pattern + 1;
            } else if (c == '%') {
                pattern++;
                if (pattern[0] == '\0') {
                    group->error = "malformed end of class";
                    return pattern;
                }
            }
            pattern++;
        }
    }
    default: return pattern;
    }
}

static bool any(char *pattern, char *end, int c) {
    while (true) {
        pattern++;
        if (pattern == end) {
            return false;
        } else if (pattern[0] == '%') {
            pattern++;
            if (class(pattern[0], c)) {
                return true;
            }
        } else if (pattern[0] == c) {
            return true;
        }
    }
}

static bool check(char *pattern, char *end, int c) {
    if (c == '\0') {
        return false;
    }
    switch (pattern[0]) {
    case '.': return true;
    case '%': return class(pattern[1], c);
    case '[': return any(pattern, end - 1, c);
    default: return pattern[0] == c;
    }
}

static char *match(Match *group, char *pattern, char *text);

static char *greedy(Match *group, char *pattern, char *end, char *text) {
    int count = 0;
    while (check(pattern, end, text[count])) {
        count++;
    }
    while (count >= 0) {
        char *stop = match(group, end + 1, text + count);
        if (stop != NULL) {
            return stop;
        }
        count--;
    }
    return NULL;
}

static char *minimal(Match *group, char *pattern, char *end, char *text) {
    while (true) {
        char *stop = match(group, end + 1, text);
        if (stop != NULL) {
            return stop;
        } else if (check(pattern, end, text[0])) {
            text++;
        } else {
            return NULL;
        }
    }
}

static char *capturing(Match *group, char *pattern, char *text) {
    int count = group->count;
    if (count >= MAX_CAPTURES) {
        group->error = "too many capture groups";
        return false;
    }
    group->capture[count].begin = text;
    group->capture[count].size = SIZE_MAX;
    group->count = count + 1;
    char *stop = match(group, pattern, text);
    if (stop == NULL) {
        group->count--;
    }
    return stop;
}

static char *captured(Match *group, char *pattern, char *text) {
    int count = group->count;
    if (count > 0) {
        while (--count >= 0) {
            Capture *capture = &group->capture[count];
            if (capture->size == SIZE_MAX) {
                capture->size = (size_t)(text - capture->begin);
                char *stop = match(group, pattern, text);
                if (stop == NULL) {
                    capture->size = SIZE_MAX;
                }
                return stop;
            }
        }
    }
    group->error = "missing start of capture group";
    return NULL;
}

static char *match(Match *group, char *pattern, char *text) {
init:
    switch (pattern[0]) {
    case '\0':
        return text;
    case '(':
        return capturing(group, pattern + 1, text);
    case ')':
        return captured(group, pattern + 1, text);
    case '$': {
        if (pattern[1] == '\0') {
            return text[0] == '\0' ? text : NULL;
        }
        break;
    }
    default:
        break;
    }
    char *end = possible(group, pattern);
    if (group->error != NULL) {
        return NULL;
    }
    bool matching = check(pattern, end, text[0]);
    switch (end[0]) {
    case '-': return minimal(group, pattern, end, text);
    case '*': return greedy(group, pattern, end, text);
    case '+': return matching ? greedy(group, pattern, end, text + 1) : NULL;
    case '?': {
        if (matching) {
            char *stop = match(group, end + 1, text + 1);
            if (stop != NULL) {
                return stop;
            }
        }
        if (group->error != NULL) {
            return NULL;
        }
        pattern = end + 1;
        goto init;
    }
    default:
        if (matching) {
            pattern = end;
            text++;
            goto init;
        }
        return NULL;
    }
}

static HymnValue get(Hymn *H, int count, HymnValue *arguments) {
    if (count < 2) {
        return hymn_arity_exception(H, 2, count);
    }
    HymnValue source = arguments[0];
    if (!hymn_is_string(source)) {
        return hymn_type_exception(H, HYMN_VALUE_STRING, source.is);
    }
    HymnValue expression = arguments[1];
    if (!hymn_is_string(expression)) {
        return hymn_type_exception(H, HYMN_VALUE_STRING, expression.is);
    }
    HymnString *text = hymn_as_string(source);
    if (count >= 3) {
        HymnValue number = arguments[2];
        if (!hymn_is_int(number)) {
            return hymn_type_exception(H, HYMN_VALUE_INTEGER, number.is);
        }
        HymnInt start = hymn_as_int(number);
        if (start < 0) {
            start = 0;
        } else if (start >= (HymnInt)hymn_string_len(text)) {
            return hymn_new_none();
        }
        text += start;
    }
    HymnString *pattern = hymn_as_string(expression);
    char *begin = NULL;
    char *end = NULL;
    Match group = {0};
    if (pattern[0] == '^') {
        end = match(&group, pattern + 1, text);
        if (end != NULL) {
            begin = text;
        }
    } else {
        while (true) {
            end = match(&group, pattern, text);
            if (end != NULL) {
                begin = text;
                break;
            } else if (group.error != NULL || text[0] == '\0') {
                break;
            }
            text++;
        }
    }
    if (group.error != NULL) {
        return hymn_new_exception(H, group.error);
    } else if (begin == NULL) {
        return hymn_new_none();
    }
    HymnArray *array = hymn_new_array(0);
    HymnString *whole = hymn_string_format("%.*s", end - begin, begin);
    HymnObjectString *have = hymn_intern_string(H, whole);
    hymn_reference_string(have);
    hymn_array_push(array, hymn_new_string_value(have));
    for (int i = 0; i < group.count; i++) {
        HymnString *sub = hymn_string_format("%.*s", group.capture[i].size, group.capture[i].begin);
        HymnObjectString *object = hymn_intern_string(H, sub);
        hymn_reference_string(object);
        hymn_array_push(array, hymn_new_string_value(object));
    }
    return hymn_new_array_value(array);
}

static HymnValue find(Hymn *H, int count, HymnValue *arguments) {
    if (count < 2) {
        return hymn_arity_exception(H, 2, count);
    }
    HymnValue source = arguments[0];
    if (!hymn_is_string(source)) {
        return hymn_type_exception(H, HYMN_VALUE_STRING, source.is);
    }
    HymnValue expression = arguments[1];
    if (!hymn_is_string(expression)) {
        return hymn_type_exception(H, HYMN_VALUE_STRING, expression.is);
    }
    HymnString *original = hymn_as_string(source);
    HymnString *text = original;
    if (count >= 3) {
        HymnValue number = arguments[2];
        if (!hymn_is_int(number)) {
            return hymn_type_exception(H, HYMN_VALUE_INTEGER, number.is);
        }
        HymnInt start = hymn_as_int(number);
        if (start < 0) {
            start = 0;
        } else if (start >= (HymnInt)hymn_string_len(text)) {
            return hymn_new_none();
        }
        text += start;
    }
    HymnString *pattern = hymn_as_string(expression);
    char *begin = NULL;
    char *end = NULL;
    Match group = {0};
    if (pattern[0] == '^') {
        end = match(&group, pattern + 1, text);
        if (end != NULL) {
            begin = text;
        }
    } else {
        while (true) {
            end = match(&group, pattern, text);
            if (end != NULL) {
                begin = text;
                break;
            } else if (group.error != NULL || text[0] == '\0') {
                break;
            }
            text++;
        }
    }
    if (group.error != NULL) {
        return hymn_new_exception(H, group.error);
    } else if (begin == NULL) {
        return hymn_new_none();
    }
    HymnArray *array = hymn_new_array(0);
    hymn_array_push(array, hymn_new_int((HymnInt)(begin - original)));
    hymn_array_push(array, hymn_new_int((HymnInt)(end - original)));
    for (int i = 0; i < group.count; i++) {
        HymnInt start = (HymnInt)(group.capture[i].begin - original);
        hymn_array_push(array, hymn_new_int(start));
        hymn_array_push(array, hymn_new_int(start + (HymnInt)group.capture[i].size));
    }
    return hymn_new_array_value(array);
}

static HymnValue is_match(Hymn *H, int count, HymnValue *arguments) {
    if (count < 2) {
        return hymn_arity_exception(H, 2, count);
    }
    HymnValue source = arguments[0];
    if (!hymn_is_string(source)) {
        return hymn_type_exception(H, HYMN_VALUE_STRING, source.is);
    }
    HymnValue expression = arguments[1];
    if (!hymn_is_string(expression)) {
        return hymn_type_exception(H, HYMN_VALUE_STRING, expression.is);
    }
    HymnString *original = hymn_as_string(source);
    if (count >= 3) {
        HymnValue number = arguments[2];
        if (!hymn_is_int(number)) {
            return hymn_type_exception(H, HYMN_VALUE_INTEGER, number.is);
        }
        HymnInt start = hymn_as_int(number);
        if (start < 0) {
            start = 0;
        } else if (start >= (HymnInt)hymn_string_len(original)) {
            return hymn_new_bool(false);
        }
        original += start;
    }
    HymnString *text = original;
    HymnString *pattern = hymn_as_string(expression);
    char *begin = NULL;
    char *end = NULL;
    Match group = {0};
    if (pattern[0] == '^') {
        end = match(&group, pattern + 1, text);
        if (end != NULL) {
            begin = text;
        }
    } else {
        while (true) {
            end = match(&group, pattern, text);
            if (end != NULL) {
                begin = text;
                break;
            } else if (group.error != NULL || text[0] == '\0') {
                break;
            }
            text++;
        }
    }
    if (group.error != NULL) {
        return hymn_new_exception(H, group.error);
    }
    return hymn_new_bool(begin != NULL && begin == original && end[0] == '\0');
}

static HymnString *replacer(HymnString *updated, char *begin, char *end, Match *group, HymnString *replacement) {
    if (updated == NULL) {
        updated = hymn_new_string_with_capacity(hymn_string_len(replacement));
    }
    HymnString *text = replacement;
    while (text[0] != '\0') {
        if (text[0] == '%') {
            char c = text[1];
            if (c == '%') {
                updated = hymn_string_append_char(updated, '%');
                text += 2;
                continue;
            } else if (c >= '0' && c <= '9') {
                int i = c - '0';
                if (i == 0) {
                    updated = hymn_string_append_substring(updated, begin, 0, (size_t)(end - begin));
                    text += 2;
                    continue;
                } else {
                    i--;
                    if (i < group->count) {
                        updated = hymn_string_append_substring(updated, group->capture[i].begin, 0, group->capture[i].size);
                        text += 2;
                        continue;
                    }
                }
            }
        }
        updated = hymn_string_append_char(updated, text[0]);
        text++;
    }
    return updated;
}

static HymnValue replace(Hymn *H, int count, HymnValue *arguments) {
    if (count < 3) {
        return hymn_arity_exception(H, 3, count);
    }
    HymnValue source = arguments[0];
    if (!hymn_is_string(source)) {
        return hymn_type_exception(H, HYMN_VALUE_STRING, source.is);
    }
    HymnValue expression = arguments[1];
    if (!hymn_is_string(expression)) {
        return hymn_type_exception(H, HYMN_VALUE_STRING, expression.is);
    }
    HymnValue substitute = arguments[2];
    if (!hymn_is_string(substitute)) {
        return hymn_type_exception(H, HYMN_VALUE_STRING, substitute.is);
    }
    HymnString *original = hymn_as_string(source);
    HymnString *text = original;
    HymnString *pattern = hymn_as_string(expression);
    HymnString *replacement = hymn_as_string(substitute);
    HymnString *updated = NULL;
    char *last = NULL;
    Match group = {0};
    if (pattern[0] == '^') {
        char *end = match(&group, pattern + 1, text);
        if (group.error == NULL && end != NULL) {
            updated = hymn_string_copy(replacement);
            last = end;
        }
    } else {
        while (true) {
            char *end = match(&group, pattern, text);
            if (group.error != NULL) {
                break;
            } else if (end != NULL) {
                if (updated == NULL) {
                    if (text == original) {
                        updated = replacer(updated, text, end, &group, replacement);
                    } else {
                        updated = hymn_string_format("%.*s", text - original, original);
                        updated = replacer(updated, text, end, &group, replacement);
                    }
                } else {
                    if (last != text) {
                        HymnString *behind = hymn_string_format("%.*s", text - last, last);
                        updated = hymn_string_append(updated, behind);
                        hymn_string_delete(behind);
                    }
                    updated = replacer(updated, text, end, &group, replacement);
                }
                last = end;
                if (end[0] == '\0') {
                    break;
                }
                text = end;
            } else {
                text++;
                if (text[0] == '\0') {
                    break;
                }
            }
        }
    }
    if (group.error != NULL) {
        if (updated != NULL) {
            hymn_string_delete(updated);
        }
        return hymn_new_exception(H, group.error);
    }
    if (updated == NULL) {
        return source;
    } else if (last != NULL) {
        updated = hymn_string_append(updated, last);
    }
    return hymn_new_string_value(hymn_intern_string(H, updated));
}

void hymn_use_pattern(Hymn *H) {
    HymnTable *pattern = hymn_new_table();
    hymn_add_function_to_table(H, pattern, "get", get);
    hymn_add_function_to_table(H, pattern, "find", find);
    hymn_add_function_to_table(H, pattern, "match", is_match);
    hymn_add_function_to_table(H, pattern, "replace", replace);
    hymn_add_table(H, "pattern", pattern);
}