add: spellcheck utility implementation using ispell

2023-10-08 12:17:08 +02:00
parent e6a3330a61
commit e8c8fe34db
5 changed files with 211 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,2 @@
+zig-cache
+zig-out
--- a/README.md
+++ b/README.md
@@ -1,3 +1,19 @@
 # sp

-Spellchecking utility to provide automatic corrected spelling for the provided input using gnu/linux spell.
+Spellchecking utility to provide automatic corrected spelling for the provided input using GNU/linux ispell.
+
+## Installation
+
+Build the tool using the following command:
+
+```bash
+zig build -Doptimize=ReleaseFast
+```
+
+This was build using zig 0.12.0.dev but should work with the most recent version of zig.
+
+Add the build `sp` executable in your `PATH` environment to easily invoke the tool.
+
+## Usage
+
+`sp <word>` to check the provided _word_. Returns the correct spelling of the word. In case of a misspelling the first suggestion of ispell is automatically choosen and returned.
--- a/build.zig
+++ b/build.zig
@@ -0,0 +1,70 @@
+const std = @import("std");
+
+// Although this function looks imperative, note that its job is to
+// declaratively construct a build graph that will be executed by an external
+// runner.
+pub fn build(b: *std.Build) void {
+    // Standard target options allows the person running `zig build` to choose
+    // what target to build for. Here we do not override the defaults, which
+    // means any target is allowed, and the default is native. Other options
+    // for restricting supported target set are available.
+    const target = b.standardTargetOptions(.{});
+
+    // Standard optimization options allow the person running `zig build` to select
+    // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not
+    // set a preferred release mode, allowing the user to decide how to optimize.
+    const optimize = b.standardOptimizeOption(.{});
+
+    const exe = b.addExecutable(.{
+        .name = "sp",
+        // In this case the main source file is merely a path, however, in more
+        // complicated build scripts, this could be a generated file.
+        .root_source_file = .{ .path = "src/main.zig" },
+        .target = target,
+        .optimize = optimize,
+    });
+
+    // This declares intent for the executable to be installed into the
+    // standard location when the user invokes the "install" step (the default
+    // step when running `zig build`).
+    b.installArtifact(exe);
+
+    // This *creates* a Run step in the build graph, to be executed when another
+    // step is evaluated that depends on it. The next line below will establish
+    // such a dependency.
+    const run_cmd = b.addRunArtifact(exe);
+
+    // By making the run step depend on the install step, it will be run from the
+    // installation directory rather than directly from within the cache directory.
+    // This is not necessary, however, if the application depends on other installed
+    // files, this ensures they will be present and in the expected location.
+    run_cmd.step.dependOn(b.getInstallStep());
+
+    // This allows the user to pass arguments to the application in the build
+    // command itself, like this: `zig build run -- arg1 arg2 etc`
+    if (b.args) |args| {
+        run_cmd.addArgs(args);
+    }
+
+    // This creates a build step. It will be visible in the `zig build --help` menu,
+    // and can be selected like this: `zig build run`
+    // This will evaluate the `run` step rather than the default, which is "install".
+    const run_step = b.step("run", "Run the app");
+    run_step.dependOn(&run_cmd.step);
+
+    // Creates a step for unit testing. This only builds the test executable
+    // but does not run it.
+    const unit_tests = b.addTest(.{
+        .root_source_file = .{ .path = "src/main.zig" },
+        .target = target,
+        .optimize = optimize,
+    });
+
+    const run_unit_tests = b.addRunArtifact(unit_tests);
+
+    // Similar to creating the run step earlier, this exposes a `test` step to
+    // the `zig build --help` menu, providing a way for the user to request
+    // running the unit tests.
+    const test_step = b.step("test", "Run unit tests");
+    test_step.dependOn(&run_unit_tests.step);
+}
--- a/1
+++ b/1
@@ -0,0 +1 @@
+zig-out/bin/sp
--- a/src/main.zig
+++ b/src/main.zig
@@ -0,0 +1,121 @@
+const std = @import("std");
+const heap = std.heap;
+const os = std.os;
+
+pub fn main() !void {
+    var gpa = heap.GeneralPurposeAllocator(.{}){};
+    defer _ = gpa.deinit();
+    var area = heap.ArenaAllocator.init(gpa.allocator());
+    defer area.deinit();
+
+    const allocator = area.allocator();
+    const stdin = std.io.getStdIn().reader();
+    const stat = try std.io.getStdIn().stat();
+    var word: []const u8 = undefined;
+
+    switch (stat.kind) {
+        .named_pipe => {
+            if (try stdin.readUntilDelimiterOrEofAlloc(allocator, '\n', 1024)) |input| {
+                word = input;
+            }
+        },
+        else => {
+            var arg_iterator = try std.process.argsWithAllocator(allocator);
+            // skip own executable name
+            _ = arg_iterator.next();
+            const next = arg_iterator.next();
+            if (next) |input| {
+                word = input;
+            }
+        },
+    }
+
+    // start ispell process
+    var process = std.ChildProcess.init(&.{ "ispell", "-a" }, allocator);
+    process.stdin_behavior = .Pipe;
+    process.stdout_behavior = .Pipe;
+    process.stderr_behavior = .Pipe;
+
+    var stdout = std.ArrayList(u8).init(allocator);
+    var stderr = std.ArrayList(u8).init(allocator);
+    defer {
+        stdout.deinit();
+        stderr.deinit();
+    }
+
+    try process.spawn();
+    // stdin behavior is .Pipe, hence we 'pipe' input into the process after spawning it
+    try process.stdin.?.writeAll(word);
+    // telling the process that the input is complete
+    process.stdin.?.close();
+    // collecting the resulting output
+    try process.collectOutput(&stdout, &stderr, 1024);
+    // erease pipe content -> assign to null
+    process.stdin = null;
+    _ = try process.wait();
+    var output = try stdout.toOwnedSlice();
+    // currently we do not care about stderr outputs
+    _ = try stderr.toOwnedSlice();
+
+    // extract the second line of the output as the first line is only information about ispell
+    var content: []u8 = try allocator.alloc(u8, 1024);
+    var idx: usize = 0;
+    var start_idx: usize = 0;
+    while (idx < output.len) : (idx += 1) {
+        const char = output[idx];
+        if (char == '\n') {
+            start_idx = idx + 1;
+            break;
+        }
+    }
+    std.mem.copy(u8, content, output[start_idx .. output.len - 1]);
+    defer allocator.free(content);
+
+    // prepare for writing output to stdout
+    const stdout_file = std.io.getStdOut().writer();
+    var bw = std.io.bufferedWriter(stdout_file);
+    const stdout_writer = bw.writer();
+
+    if (content[0] == '*' or content[0] == '+') {
+        // given word was correct so just return the input as no replacement is necessary
+        try stdout_writer.print("{s}", .{word});
+    } else {
+        // there was a suggestion made by ispell
+        // just select the first one (maybe I can make the user select an option?)
+        idx = 2 + word.len + 1;
+        start_idx = idx;
+        var end_idx = start_idx;
+        // get selection count
+        while (content[end_idx] != ' ') : (end_idx += 1) {}
+        const suggestion_count = try std.fmt.parseInt(u8, content[start_idx..end_idx], 10);
+        var suggestion_idx: usize = 0;
+        var suggestions: [][]u8 = try allocator.alloc([]u8, suggestion_count);
+        defer {
+            for (suggestions) |suggestion| {
+                allocator.free(suggestion);
+            }
+            allocator.free(suggestions);
+        }
+        idx = idx + 3 + 2;
+        start_idx = idx;
+        end_idx = start_idx + 1;
+        // extract all selections
+        while (suggestion_idx < suggestion_count) {
+            // get index of the next suggestion
+            while (content[end_idx] != ',') : (end_idx += 1) {
+                if (end_idx == content.len - 1 or content[end_idx] == 0) {
+                    break;
+                }
+            }
+            const size: usize = end_idx - start_idx;
+            var suggestion: []u8 = try allocator.alloc(u8, size);
+            std.mem.copy(u8, suggestion, content[start_idx..end_idx]);
+            suggestions[suggestion_idx] = suggestion;
+            suggestion_idx += 1;
+            start_idx = end_idx + 2;
+            end_idx = start_idx + 1;
+        }
+        try stdout_writer.print("{s}", .{suggestions[0]});
+    }
+    try bw.flush(); // don't forget to flush!
+}