Some checks failed
Zig Project Action / Lint, Spell-check and test zig project (push) Failing after 55s
It still has a minor memory leak and has at least two hacks implemented that I would like to improve on.
396 lines
15 KiB
Zig
396 lines
15 KiB
Zig
///! Lexer for *unified diff* format to tokenize input sources accordningly.
|
|
pub const Token = struct {
|
|
tag: Tag,
|
|
loc: Location,
|
|
|
|
pub const Location = struct {
|
|
idx: usize,
|
|
len: usize,
|
|
};
|
|
|
|
pub const Tag = enum(u8) {
|
|
/// File information; contains the content of:
|
|
/// ```
|
|
/// --- a/xxx
|
|
/// --- b/xxx
|
|
/// ```
|
|
/// *NOTE* includes trailing newline character
|
|
file,
|
|
/// Hunk header information; contains content of:
|
|
/// ```@@ -x,y +z,y @@```
|
|
header,
|
|
/// may be diff content or filler content of the tools output
|
|
content,
|
|
/// invalid contents that could not be parsed correctly
|
|
invalid,
|
|
/// End of file
|
|
eof,
|
|
|
|
pub fn lexeme(tag: Tag) ?[]const u8 {
|
|
return switch (tag) {
|
|
.header => "@@ -x,y +z,y @@",
|
|
.content => "..",
|
|
.file => "diff --git a/xxx b/xxx",
|
|
};
|
|
}
|
|
|
|
pub fn symbol(tag: Tag) []const u8 {
|
|
return tag.lexeme() orelse switch (tag) {
|
|
.eof => "EOF",
|
|
.invalid => "invalid",
|
|
else => unreachable,
|
|
};
|
|
}
|
|
};
|
|
};
|
|
|
|
pub const Tokenizer = struct {
|
|
buffer: [:0]const u8,
|
|
index: usize,
|
|
|
|
/// For debugging purposes
|
|
pub fn dump(self: *const Tokenizer, token: *const Token) void {
|
|
print(".{s} \"{s}\"\n", .{ @tagName(token.tag), self.buffer[token.loc.idx .. token.loc.idx + token.loc.len] });
|
|
}
|
|
|
|
pub fn init(buffer: [:0]const u8) Tokenizer {
|
|
return .{
|
|
.buffer = buffer,
|
|
// skip the UTF-8 BOM if present
|
|
.index = if (mem.startsWith(u8, buffer, "\xEF\xBB\xBF")) 3 else 0,
|
|
};
|
|
}
|
|
|
|
const State = enum {
|
|
default,
|
|
invalid,
|
|
at_sign,
|
|
minus,
|
|
header,
|
|
plus,
|
|
file,
|
|
};
|
|
|
|
/// state fsm (finite state machine) describing the syntax of `nf`
|
|
/// TODO I need to draw one for all the possible states for tokenization!
|
|
/// -> for that I can create test cases!
|
|
/// -> detect valid and invalid syntax uses! this is however the job of the parser?
|
|
///
|
|
/// TODO points to improve on:
|
|
/// -> reduce duplicated code sections
|
|
/// -> make tags more explicit (i.e. remove unnecessary newlines, whitespaces, etc.)
|
|
/// -> streamline catching the common cases for tokens
|
|
/// -> reduce state machine
|
|
/// -> do not group tokens, instead this should be done by the parser when deriving the ast from the token stream
|
|
/// then the parser can identify missing parts and even point to the corresponding location in the file!
|
|
pub fn next(this: *Tokenizer) Token {
|
|
const token = this.next_token();
|
|
this.index = token.loc.idx + token.loc.len;
|
|
return token;
|
|
}
|
|
|
|
fn next_token(this: *const Tokenizer) Token {
|
|
var index = this.index;
|
|
var result: Token = .{
|
|
.tag = undefined,
|
|
.loc = .{
|
|
.idx = this.index,
|
|
.len = undefined,
|
|
},
|
|
};
|
|
state: switch (State.default) {
|
|
.default => switch (this.buffer[index]) {
|
|
0 => if (index == this.buffer.len) {
|
|
if (result.loc.idx != index) {
|
|
result.tag = .content;
|
|
} else {
|
|
return .{
|
|
.tag = .eof,
|
|
.loc = .{
|
|
.idx = index,
|
|
.len = 0,
|
|
},
|
|
};
|
|
}
|
|
} else {
|
|
continue :state .invalid;
|
|
},
|
|
'@' => continue :state .at_sign,
|
|
'-' => continue :state .minus,
|
|
else => {
|
|
index += 1;
|
|
continue :state .default;
|
|
},
|
|
},
|
|
.invalid => {
|
|
switch (this.buffer[index]) {
|
|
0 => result.tag = .invalid,
|
|
else => {
|
|
index += 1;
|
|
result.tag = .invalid;
|
|
},
|
|
}
|
|
},
|
|
.at_sign => {
|
|
index += 1;
|
|
switch (this.buffer[index]) {
|
|
'@' => if (result.loc.idx != index - 1) {
|
|
index -= 1;
|
|
result.tag = .content;
|
|
} else continue :state .header,
|
|
else => continue :state .default,
|
|
}
|
|
},
|
|
.header => {
|
|
index += 1;
|
|
switch (this.buffer[index]) {
|
|
'@' => if (this.buffer[index + 1] == '@') {
|
|
result.tag = .header;
|
|
index += 2;
|
|
} else continue :state .invalid,
|
|
0 => continue :state .invalid,
|
|
else => continue :state .header,
|
|
}
|
|
},
|
|
.minus => {
|
|
index += 1;
|
|
switch (this.buffer[index]) {
|
|
// assuming that we start with a minus!
|
|
'-' => if (this.buffer[index + 1] == '-') {
|
|
if (result.loc.idx != index - 1) {
|
|
index -= 1;
|
|
result.tag = .content;
|
|
} else {
|
|
index += 1;
|
|
continue :state .file;
|
|
}
|
|
} else continue :state .default,
|
|
0 => continue :state .invalid,
|
|
else => continue :state .default,
|
|
}
|
|
},
|
|
.file => {
|
|
// std.log.err(".file: {s}", .{this.buffer[index - 2 .. @min(index + 3, this.buffer.len)]});
|
|
index += 1;
|
|
switch (this.buffer[index]) {
|
|
'+' => if (this.buffer[index + 1] == '+' and this.buffer[index + 2] == '+') {
|
|
index += 2;
|
|
continue :state .plus;
|
|
} else continue :state .file,
|
|
0 => continue :state .invalid,
|
|
else => continue :state .file,
|
|
}
|
|
},
|
|
.plus => {
|
|
// std.log.err(".plus", .{});
|
|
index += 1;
|
|
switch (this.buffer[index]) {
|
|
'\n' => {
|
|
index += 1; // include newline
|
|
result.tag = .file;
|
|
},
|
|
0 => continue :state .invalid,
|
|
else => continue :state .plus,
|
|
}
|
|
},
|
|
}
|
|
|
|
result.loc.len = index - result.loc.idx;
|
|
return result;
|
|
}
|
|
};
|
|
|
|
const std = @import("std");
|
|
const mem = std.mem;
|
|
const debug = std.debug;
|
|
const testing = std.testing;
|
|
const assert = debug.assert;
|
|
const print = debug.print;
|
|
|
|
test "individual change" {
|
|
try testTokenize(
|
|
\\diff --git a/build.zig.zon b/build.zig.zon
|
|
\\index 99bede4..a039487 100644
|
|
\\--- a/build.zig.zon
|
|
\\+++ b/build.zig.zon
|
|
\\@@ -3,8 +3,8 @@
|
|
\\ .version = "0.0.1",
|
|
\\ .dependencies = .{
|
|
\\ .zterm = .{
|
|
\\- .url = "git+https://gitea.yves-biener.de/yves-biener/zterm#855594a8c836723f0230bfd6ad24f47613a147b1",
|
|
\\- .hash = "zterm-0.3.0-1xmmEM8eHAB0cA7KLXGC7C8Nt7YEJcyoTme4domF1Yty",
|
|
\\+ .url = "git+https://gitea.yves-biener.de/yves-biener/zterm#e972a2ea0f7a9f8caffd439ef206474b46475f91",
|
|
\\+ .hash = "zterm-0.3.0-1xmmENkhHAB2rmNJFH-9rRqiRLnT673xwuMrqLwOnlT_",
|
|
\\ },
|
|
\\ },
|
|
\\ .minimum_zig_version = "0.16.0-dev.1254+bf15c791f",
|
|
, &.{ .content, .file, .header, .content, .eof });
|
|
}
|
|
|
|
test "individual changes in the different files" {
|
|
try testTokenize(
|
|
\\diff --git a/build.zig.zon b/build.zig.zon
|
|
\\index 99bede4..a039487 100644
|
|
\\--- a/build.zig.zon
|
|
\\+++ b/build.zig.zon
|
|
\\@@ -3,8 +3,8 @@
|
|
\\ .version = "0.0.1",
|
|
\\ .dependencies = .{
|
|
\\ .zterm = .{
|
|
\\- .url = "git+https://gitea.yves-biener.de/yves-biener/zterm#855594a8c836723f0230bfd6ad24f47613a147b1",
|
|
\\- .hash = "zterm-0.3.0-1xmmEM8eHAB0cA7KLXGC7C8Nt7YEJcyoTme4domF1Yty",
|
|
\\+ .url = "git+https://gitea.yves-biener.de/yves-biener/zterm#e972a2ea0f7a9f8caffd439ef206474b46475f91",
|
|
\\+ .hash = "zterm-0.3.0-1xmmENkhHAB2rmNJFH-9rRqiRLnT673xwuMrqLwOnlT_",
|
|
\\ },
|
|
\\ },
|
|
\\ .minimum_zig_version = "0.16.0-dev.1254+bf15c791f",
|
|
\\diff --git a/src/model.zig b/src/model.zig
|
|
\\index b402c51..defd874 100644
|
|
\\--- a/src/model.zig
|
|
\\+++ b/src/model.zig
|
|
\\@@ -30,3 +30,9 @@ pub const Change = struct {
|
|
\\
|
|
\\ const Model = @This();
|
|
\\ const std = @import("std");
|
|
\\+const lexer = @import("lexer.zig");
|
|
\\+
|
|
\\+test {
|
|
\\+ std.testing.refAllDeclsRecursive(@This());
|
|
\\+ _ = @import("lexer.zig");
|
|
\\+}
|
|
, &.{ .content, .file, .header, .content, .file, .header, .content, .eof });
|
|
}
|
|
|
|
test "multiple changes in same file" {
|
|
try testTokenize(
|
|
\\diff --git a/src/queue.zig b/src/queue.zig
|
|
\\index aae7ddf..2591b0a 100644
|
|
\\--- a/src/queue.zig
|
|
\\+++ b/src/queue.zig
|
|
\\@@ -215,7 +215,7 @@ fn sleepyPop(q: *Queue(u8, 2)) !void {
|
|
\\ // still full and the push in the other thread is still blocked
|
|
\\ // waiting for space.
|
|
\\ try Thread.yield();
|
|
\\- std.Thread.sleep(std.time.ns_per_s);
|
|
\\+ // std.Thread.sleep(std.time.ns_per_s);
|
|
\\ // Finally, let that other thread go.
|
|
\\ try testing.expectEqual(1, q.pop());
|
|
\\
|
|
\\@@ -225,7 +225,7 @@ fn sleepyPop(q: *Queue(u8, 2)) !void {
|
|
\\ try Thread.yield();
|
|
\\ // But we want to ensure that there's a second push waiting, so
|
|
\\ // here's another sleep.
|
|
\\- std.Thread.sleep(std.time.ns_per_s / 2);
|
|
\\+ // std.Thread.sleep(std.time.ns_per_s / 2);
|
|
\\
|
|
\\ // Another spurious wake...
|
|
\\ q.not_full.signal();
|
|
\\@@ -233,7 +233,7 @@ fn sleepyPop(q: *Queue(u8, 2)) !void {
|
|
\\ // And another chance for the other thread to see that it's
|
|
\\ // spurious and go back to sleep.
|
|
\\ try Thread.yield();
|
|
\\- std.Thread.sleep(std.time.ns_per_s / 2);
|
|
\\+ // std.Thread.sleep(std.time.ns_per_s / 2);
|
|
\\
|
|
\\ // Pop that thing and we're done.
|
|
\\ try testing.expectEqual(2, q.pop());
|
|
\\@@ -250,13 +250,13 @@ test "Fill, block, fill, block" {
|
|
\\ const thread = try Thread.spawn(cfg, sleepyPop, .{&queue});
|
|
\\ queue.push(1);
|
|
\\ queue.push(2);
|
|
\\- const now = std.time.milliTimestamp();
|
|
\\+ // const now = std.time.milliTimestamp();
|
|
\\ queue.push(3); // This one should block.
|
|
\\- const then = std.time.milliTimestamp();
|
|
\\+ // const then = std.time.milliTimestamp();
|
|
\\
|
|
\\ // Just to make sure the sleeps are yielding to this thread, make
|
|
\\ // sure it took at least 900ms to do the push.
|
|
\\- try testing.expect(then - now > 900);
|
|
\\+ // try testing.expect(then - now > 900);
|
|
\\
|
|
\\ // This should block again, waiting for the other thread.
|
|
\\ queue.push(4);
|
|
\\@@ -270,14 +270,14 @@ test "Fill, block, fill, block" {
|
|
\\ fn sleepyPush(q: *Queue(u8, 1)) !void {
|
|
\\ // Try to ensure the other thread has already started trying to pop.
|
|
\\ try Thread.yield();
|
|
\\- std.Thread.sleep(std.time.ns_per_s / 2);
|
|
\\+ // std.Thread.sleep(std.time.ns_per_s / 2);
|
|
\\
|
|
\\ // Spurious wake
|
|
\\ q.not_full.signal();
|
|
\\ q.not_empty.signal();
|
|
\\
|
|
\\ try Thread.yield();
|
|
\\- std.Thread.sleep(std.time.ns_per_s / 2);
|
|
\\+ // std.Thread.sleep(std.time.ns_per_s / 2);
|
|
\\
|
|
\\ // Stick something in the queue so it can be popped.
|
|
\\ q.push(1);
|
|
\\@@ -286,7 +286,7 @@ fn sleepyPush(q: *Queue(u8, 1)) !void {
|
|
\\ try Thread.yield();
|
|
\\ // Give the other thread time to block again.
|
|
\\ try Thread.yield();
|
|
\\- std.Thread.sleep(std.time.ns_per_s / 2);
|
|
\\+ // std.Thread.sleep(std.time.ns_per_s / 2);
|
|
\\
|
|
\\ // Spurious wake
|
|
\\ q.not_full.signal();
|
|
\\@@ -317,7 +317,7 @@ test "2 readers" {
|
|
\\ const t1 = try Thread.spawn(cfg, readerThread, .{&queue});
|
|
\\ const t2 = try Thread.spawn(cfg, readerThread, .{&queue});
|
|
\\ try Thread.yield();
|
|
\\- std.Thread.sleep(std.time.ns_per_s / 2);
|
|
\\+ // std.Thread.sleep(std.time.ns_per_s / 2);
|
|
\\ queue.push(1);
|
|
\\ queue.push(1);
|
|
\\ t1.join();
|
|
\\ );
|
|
, &.{
|
|
.content,
|
|
.file,
|
|
.header,
|
|
.content,
|
|
.header,
|
|
.content,
|
|
.header,
|
|
.content,
|
|
.header,
|
|
.content,
|
|
.header,
|
|
.content,
|
|
.header,
|
|
.content,
|
|
.header,
|
|
.content,
|
|
.eof,
|
|
});
|
|
}
|
|
|
|
/// Test tokenizer's iterator outputs for the provided source. It should
|
|
/// match the expected token tags, except the very last .eof tag which shall
|
|
/// be omitted from the argument of expected_token_tags, as this function
|
|
/// explicitly tests for the .eof tag (with corresponding location information).
|
|
fn testTokenize(source: [:0]const u8, expected_token_tags: []const Token.Tag) !void {
|
|
var tokenizer = Tokenizer.init(source);
|
|
for (0.., expected_token_tags) |i, expected| {
|
|
const token = tokenizer.next();
|
|
testing.expectEqual(expected, token.tag) catch |err| {
|
|
print("Got token: ", .{});
|
|
tokenizer.dump(&token);
|
|
print("Expected .{s} at index {d}\n", .{ @tagName(expected), i });
|
|
return err;
|
|
};
|
|
}
|
|
const last_token = tokenizer.next();
|
|
testing.expectEqual(Token.Tag.eof, last_token.tag) catch |err| {
|
|
print("Got token: ", .{});
|
|
tokenizer.dump(&last_token);
|
|
print("Expected .{s}\n", .{@tagName(Token.Tag.eof)});
|
|
return err;
|
|
};
|
|
try testing.expectEqual(source.len, last_token.loc.idx);
|
|
try testing.expectEqual(0, last_token.loc.len);
|
|
}
|