add: spellcheck utility implementation using ispell

2023-10-08 12:17:08 +02:00
parent e6a3330a61
commit e8c8fe34db
5 changed files with 211 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,2 @@
 zig-cache
 zig-out
--- a/README.md
+++ b/README.md
@@ -1,3 +1,19 @@
 # sp
-Spellchecking utility to provide automatic corrected spelling for the provided input using gnu/linux spell.
+Spellchecking utility to provide automatic corrected spelling for the provided input using GNU/linux ispell.
 ## Installation
 Build the tool using the following command:
 ```bash
 zig build -Doptimize=ReleaseFast
 ```
 This was build using zig 0.12.0.dev but should work with the most recent version of zig.
 Add the build `sp` executable in your `PATH` environment to easily invoke the tool.
 ## Usage
 `sp <word>` to check the provided _word_. Returns the correct spelling of the word. In case of a misspelling the first suggestion of ispell is automatically choosen and returned.
--- a/build.zig
+++ b/build.zig
@@ -0,0 +1,70 @@
 const std = @import("std");
 // Although this function looks imperative, note that its job is to
 // declaratively construct a build graph that will be executed by an external
 // runner.
 pub fn build(b: *std.Build) void {
    // Standard target options allows the person running `zig build` to choose
    // what target to build for. Here we do not override the defaults, which
    // means any target is allowed, and the default is native. Other options
    // for restricting supported target set are available.
    const target = b.standardTargetOptions(.{});
    // Standard optimization options allow the person running `zig build` to select
    // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not
    // set a preferred release mode, allowing the user to decide how to optimize.
    const optimize = b.standardOptimizeOption(.{});
    const exe = b.addExecutable(.{
        .name = "sp",
        // In this case the main source file is merely a path, however, in more
        // complicated build scripts, this could be a generated file.
        .root_source_file = .{ .path = "src/main.zig" },
        .target = target,
        .optimize = optimize,
    });
    // This declares intent for the executable to be installed into the
    // standard location when the user invokes the "install" step (the default
    // step when running `zig build`).
    b.installArtifact(exe);
    // This *creates* a Run step in the build graph, to be executed when another
    // step is evaluated that depends on it. The next line below will establish
    // such a dependency.
    const run_cmd = b.addRunArtifact(exe);
    // By making the run step depend on the install step, it will be run from the
    // installation directory rather than directly from within the cache directory.
    // This is not necessary, however, if the application depends on other installed
    // files, this ensures they will be present and in the expected location.
    run_cmd.step.dependOn(b.getInstallStep());
    // This allows the user to pass arguments to the application in the build
    // command itself, like this: `zig build run -- arg1 arg2 etc`
    if (b.args) |args| {
        run_cmd.addArgs(args);
    }
    // This creates a build step. It will be visible in the `zig build --help` menu,
    // and can be selected like this: `zig build run`
    // This will evaluate the `run` step rather than the default, which is "install".
    const run_step = b.step("run", "Run the app");
    run_step.dependOn(&run_cmd.step);
    // Creates a step for unit testing. This only builds the test executable
    // but does not run it.
    const unit_tests = b.addTest(.{
        .root_source_file = .{ .path = "src/main.zig" },
        .target = target,
        .optimize = optimize,
    });
    const run_unit_tests = b.addRunArtifact(unit_tests);
    // Similar to creating the run step earlier, this exposes a `test` step to
    // the `zig build --help` menu, providing a way for the user to request
    // running the unit tests.
    const test_step = b.step("test", "Run unit tests");
    test_step.dependOn(&run_unit_tests.step);
 }
--- a/1
+++ b/1
@@ -0,0 +1 @@
 zig-out/bin/sp
--- a/src/main.zig
+++ b/src/main.zig
@@ -0,0 +1,121 @@
 const std = @import("std");
 const heap = std.heap;
 const os = std.os;
 pub fn main() !void {
    var gpa = heap.GeneralPurposeAllocator(.{}){};
    defer _ = gpa.deinit();
    var area = heap.ArenaAllocator.init(gpa.allocator());
    defer area.deinit();
    const allocator = area.allocator();
    const stdin = std.io.getStdIn().reader();
    const stat = try std.io.getStdIn().stat();
    var word: []const u8 = undefined;
    switch (stat.kind) {
        .named_pipe => {
            if (try stdin.readUntilDelimiterOrEofAlloc(allocator, '\n', 1024)) |input| {
                word = input;
            }
        },
        else => {
            var arg_iterator = try std.process.argsWithAllocator(allocator);
            // skip own executable name
            _ = arg_iterator.next();
            const next = arg_iterator.next();
            if (next) |input| {
                word = input;
            }
        },
    }
    // start ispell process
    var process = std.ChildProcess.init(&.{ "ispell", "-a" }, allocator);
    process.stdin_behavior = .Pipe;
    process.stdout_behavior = .Pipe;
    process.stderr_behavior = .Pipe;
    var stdout = std.ArrayList(u8).init(allocator);
    var stderr = std.ArrayList(u8).init(allocator);
    defer {
        stdout.deinit();
        stderr.deinit();
    }
    try process.spawn();
    // stdin behavior is .Pipe, hence we 'pipe' input into the process after spawning it
    try process.stdin.?.writeAll(word);
    // telling the process that the input is complete
    process.stdin.?.close();
    // collecting the resulting output
    try process.collectOutput(&stdout, &stderr, 1024);
    // erease pipe content -> assign to null
    process.stdin = null;
    _ = try process.wait();
    var output = try stdout.toOwnedSlice();
    // currently we do not care about stderr outputs
    _ = try stderr.toOwnedSlice();
    // extract the second line of the output as the first line is only information about ispell
    var content: []u8 = try allocator.alloc(u8, 1024);
    var idx: usize = 0;
    var start_idx: usize = 0;
    while (idx < output.len) : (idx += 1) {
        const char = output[idx];
        if (char == '\n') {
            start_idx = idx + 1;
            break;
        }
    }
    std.mem.copy(u8, content, output[start_idx .. output.len - 1]);
    defer allocator.free(content);
    // prepare for writing output to stdout
    const stdout_file = std.io.getStdOut().writer();
    var bw = std.io.bufferedWriter(stdout_file);
    const stdout_writer = bw.writer();
    if (content[0] == '*' or content[0] == '+') {
        // given word was correct so just return the input as no replacement is necessary
        try stdout_writer.print("{s}", .{word});
    } else {
        // there was a suggestion made by ispell
        // just select the first one (maybe I can make the user select an option?)
        idx = 2 + word.len + 1;
        start_idx = idx;
        var end_idx = start_idx;
        // get selection count
        while (content[end_idx] != ' ') : (end_idx += 1) {}
        const suggestion_count = try std.fmt.parseInt(u8, content[start_idx..end_idx], 10);
        var suggestion_idx: usize = 0;
        var suggestions: [][]u8 = try allocator.alloc([]u8, suggestion_count);
        defer {
            for (suggestions) |suggestion| {
                allocator.free(suggestion);
            }
            allocator.free(suggestions);
        }
        idx = idx + 3 + 2;
        start_idx = idx;
        end_idx = start_idx + 1;
        // extract all selections
        while (suggestion_idx < suggestion_count) {
            // get index of the next suggestion
            while (content[end_idx] != ',') : (end_idx += 1) {
                if (end_idx == content.len - 1 or content[end_idx] == 0) {
                    break;
                }
            }
            const size: usize = end_idx - start_idx;
            var suggestion: []u8 = try allocator.alloc(u8, size);
            std.mem.copy(u8, suggestion, content[start_idx..end_idx]);
            suggestions[suggestion_idx] = suggestion;
            suggestion_idx += 1;
            start_idx = end_idx + 2;
            end_idx = start_idx + 1;
        }
        try stdout_writer.print("{s}", .{suggestions[0]});
    }
    try bw.flush(); // don't forget to flush!
 }