initial commit based on existing implementation of another project
Some checks failed
Zig Project Action / Lint, Spell-check and test zig project (push) Failing after 1m37s

Added documentation comments with example snippets and a complete
example showcasing how to use the library for matching.
This commit is contained in:
2025-11-25 19:32:30 +01:00
parent eb7cc9c2dc
commit 04f082d801
5 changed files with 325 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
.zig-cache/
zig-out/

9
LICENSE Normal file
View File

@@ -0,0 +1,9 @@
MIT License
Copyright (c) 2025 Yves Biener
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

20
build.zig Normal file
View File

@@ -0,0 +1,20 @@
const std = @import("std");
pub fn build(b: *std.Build) void {
const target = b.standardTargetOptions(.{});
const optimize = b.standardOptimizeOption(.{});
const mod = b.addModule("fuzzig", .{
.root_source_file = b.path("src/root.zig"),
.target = target,
.optimize = optimize,
});
const mod_tests = b.addTest(.{
.root_module = mod,
});
const run_mod_tests = b.addRunArtifact(mod_tests);
const test_step = b.step("test", "Run tests");
test_step.dependOn(&run_mod_tests.step);
}

14
build.zig.zon Normal file
View File

@@ -0,0 +1,14 @@
.{
.name = .fuzzig,
// This is a [Semantic Version](https://semver.org/).
.version = "0.0.0",
.fingerprint = 0x6450ab302d40f9a8, // Changing this has security and trust implications.
.minimum_zig_version = "0.16.0-dev.1254+bf15c791f",
.dependencies = .{},
.paths = .{
"build.zig",
"build.zig.zon",
"src",
"LICENSE",
},
}

280
src/root.zig Normal file
View File

@@ -0,0 +1,280 @@
//! `Fuzzig` the fuzzy search library. Matching algorithm implementation is based on
//! [ms-edit](https://github.com/microsoft/edit/blob/main/src/fuzzy.rs) MIT-Licensed.
/// Result of the calculated score. The `Result` holds memory that needs to be
/// freed once the `Result` is no longer required.
///
/// # Example
///
/// ```zig
/// const item = haystack[result.index];
/// var match_highlights: []u8 = try gpa.alloc(u8, item.len);
/// defer gpa.free(match_highlights);
/// @memset(match_highlights, ' ');
/// // highlight what caused this search result
/// for (result.positions.items) |pos| match_highlights[pos] = '^';
/// ```
///
/// Results in a match (with highlight) as for in the above example
/// (`haystack[i]` = "Hello, World!", `match` = "world"):
///
/// ```
/// Hello, World!
/// ^^^^^
/// ```
pub const Result = struct {
score: usize,
// positions are reversed and contain the indices of the characters that were matched during the fuzzy scoring.
positions: std.ArrayList(usize),
// index to the file this `Result` relates to. Using this index the associated file can be determined.
index: usize,
pub fn init(score: usize, positions: std.ArrayList(usize), index: usize) @This() {
return .{
.score = score,
.positions = positions,
.index = index,
};
}
pub fn deinit(this: *@This(), gpa: Allocator) void {
this.positions.deinit(gpa);
}
};
/// Compare function for `std.sort.heap`. Comparator sort items by their highest
/// score.
///
/// # Example
///
/// The following examples shows a short example on the usage of the `greaterThan`
/// function when applying to the heap sort algorithm of the standard library.
///
/// ```zig
/// var results: std.ArrayList(Result) = .empty;
/// // ..
/// std.sort.heap(fuzzig.Result, results.items, {}, fuzzig.greaterThan);
/// // act on sorted scores:
/// for (results.items) |result| {}
/// ```
pub fn greaterThan(_: void, a: Result, b: Result) bool {
return a.score > b.score;
}
/// Calculate the matching score for the provided query against the target string. The index is used as
/// a reference for the target string and is passed through to the returned `Result`.
///
/// In case there are no possible matches `null` is returned (i.e. the target
/// string is empty or the query is longer than the target).
///
/// # Example
///
/// Given a list of file names, you can match a given string as follows:
///
/// ```zig
/// var results: std.ArrayList(Result) = .empty;
/// defer {
/// for (results.items) |*result| result.deinit(gpa);
/// results.deinit(gpa);
/// }
/// // ..
/// // create fuzzy score for each file entry
/// for (0.., files.items) |idx, entry| {
/// const result = try fuzzig.match(gpa, entry, search, idx) orelse continue;
/// try results.append(gpa, result);
/// }
/// ```
pub fn match(gpa: Allocator, target: []const u8, query: []const u8, index: usize) !?Result {
if (target.len == 0 or query.len == 0) return null;
if (target.len < query.len) return null;
// temporary arena allocator to free all allocated memory at the end of the function
var scratch_arena: heap.ArenaAllocator = .init(gpa);
defer scratch_arena.deinit();
const allocator = scratch_arena.allocator();
const target_lower = try fold_case(allocator, target);
const query_lower = try fold_case(allocator, query);
const area = target.len * query.len;
var scores = try allocator.alloc(usize, area);
var matches = try allocator.alloc(usize, area);
for (0..query.len) |idx| {
const offset = idx * target.len;
const prev_offset = if (idx > 0) (idx - 1) * target.len else 0;
for (0..target.len) |target_index| {
const current_idx = offset + target_index;
const diag_idx = if (idx > 0 and target_index > 0) prev_offset + target_index - 1 else 0;
const left_score = if (target_index > 0) scores[current_idx - 1] else 0;
const diag_score = if (idx > 0 and target_index > 0) scores[diag_idx] else 0;
const matches_sequence_len = if (idx > 0 and target_index > 0) matches[diag_idx] else 0;
const score = if (diag_score == 0 and idx != 0) 0 else compute_char_score(
query[idx],
query_lower[idx],
if (target_index != 0) target[target_index - 1] else null,
target[target_index],
target_lower[target_index],
matches_sequence_len,
);
if (score != 0 and diag_score + score >= left_score) {
matches[current_idx] = matches_sequence_len + 1;
scores[current_idx] = diag_score + score;
} else {
matches[current_idx] = 0;
scores[current_idx] = left_score;
}
}
}
var positions: std.ArrayList(usize) = .empty;
if (query.len != 0 and target.len != 0) {
var query_idx = query.len - 1;
var target_idx = target.len - 1;
while (true) {
const current_idx = query_idx * target.len + target_idx;
if (matches[current_idx] == 0) {
if (target_idx == 0) break;
target_idx -= 1;
} else {
try positions.append(gpa, target_idx);
if (query_idx == 0 or target_idx == 0) break;
query_idx -= 1;
target_idx -= 1;
}
}
}
return .init(scores[area - 1], positions, index);
}
/// Compute the score for a given character, while taking into account the previous character, the already matching length of a (sub-)sequence.
fn compute_char_score(query: u8, query_lower: u8, target_prev: ?u8, target_curr: u8, target_curr_lower: u8, matches_sequence_len: usize) usize {
var score: usize = 0;
if (!(query_lower == target_curr_lower or query_lower == '/' or query_lower == '\\' and target_curr_lower == '/' or target_curr_lower == '\\')) return score;
score += 1;
if (matches_sequence_len > 0) score += matches_sequence_len * 5;
if (query == target_curr) score += 1;
if (target_prev) |prev| {
score += score_separator_at_pos(prev);
if (target_curr != target_curr_lower and matches_sequence_len == 0) score += 2;
} else {
score += 8;
}
return score;
}
/// Scoring for separator characters. Slightly prefering path separators over other separators.
fn score_separator_at_pos(prev: u8) u32 {
return switch (prev) {
'/', '\\' => 5, // prefer path separators...
'_', '-', '.', ' ', '\'', '"', ':' => 4, // ...over other separators
else => 0,
};
}
/// Fold the case of the provided string (of ascii characters). Returned slice
/// is owned by the caller and has to be freed using the provided `Allocator`.
fn fold_case(gpa: Allocator, s: []const u8) ![]const u8 {
var vector: std.ArrayList(u8) = try .initCapacity(gpa, s.len);
for (s) |c| {
var target = c;
if (target > 64 and target < 91) target += 32;
vector.appendAssumeCapacity(target);
}
return try vector.toOwnedSlice(gpa);
}
const std = @import("std");
const heap = std.heap;
const testing = std.testing;
const Allocator = std.mem.Allocator;
test "matching `s` on local files" {
var gpa = testing.allocator;
// files to fuzzy match against
var files: std.ArrayList([]const u8) = .empty;
defer {
for (files.items) |file| gpa.free(file);
files.deinit(gpa);
}
// fuzzy matching results (containing only the scores)
var results: std.ArrayList(Result) = .empty;
defer {
for (results.items) |*result| result.deinit(gpa);
results.deinit(gpa);
}
// arrange
var dir = try std.fs.cwd().openDir(".", .{ .iterate = true });
defer dir.close();
var iter = try dir.walk(gpa);
defer iter.deinit();
while (try iter.next()) |entry| {
switch (entry.kind) {
.file => {
if (std.mem.startsWith(u8, entry.path, ".git/")) continue;
if (std.mem.startsWith(u8, entry.path, ".zig-cache")) continue;
const path = try gpa.dupe(u8, entry.path[0..entry.path.len]);
try files.append(gpa, path);
},
else => continue,
}
}
try results.ensureTotalCapacity(gpa, files.items.len);
// act
const search = "s";
// create fuzzy score for each file entry
for (0.., files.items) |idx, entry| {
const result = try match(gpa, entry, search, idx) orelse continue;
try results.append(gpa, result);
}
// sort scores by their received score descending
std.sort.heap(Result, results.items, {}, greaterThan);
var buf: [128]u8 = undefined;
var buffer = std.fs.File.stderr().writer(&buf);
var writer = &buffer.interface;
defer writer.flush() catch unreachable;
std.debug.lockStdErr();
defer std.debug.unlockStdErr();
// assert
var scored_entries: usize = 0;
var unscored_entries: usize = 0;
for (results.items) |result| {
if (result.score > 0) scored_entries += 1 else unscored_entries += 1;
if (result.score == 0) continue; // do not print results that are unmatched
const item = files.items[result.index];
var match_highlights: []u8 = try gpa.alloc(u8, item.len);
defer gpa.free(match_highlights);
@memset(match_highlights, ' ');
// highlight what caused this search result
for (result.positions.items) |pos| match_highlights[pos] = '^';
// print item and its highlighted positions
// NOTE uncomment the print for the writer to show matches and their highlights of what matched
// -> as the writer prints to *stderr* writing will cause the test to fail, hence it is commented out by default
// try writer.print("{s}\n{s}\n", .{ item, match_highlights });
}
try testing.expectEqual(5, scored_entries);
try testing.expectEqual(results.items.len - 5, unscored_entries);
}