commit ba46ad17f18ed4f0afd3bfe6d924a137e5ecc42d
parent ae01224987871318eb43bb22ede4b85ea1134265
Author: Sylvia Ivory <git@sivory.net>
Date: Sat, 24 Jan 2026 16:36:53 -0800
Implement coroutines
Diffstat:
3 files changed, 208 insertions(+), 5 deletions(-)
diff --git a/build.zig b/build.zig
@@ -46,6 +46,8 @@ fn add_exe_fake_pi(exe_name: []const u8, path: std.Build.LazyPath, b: *std.Build
}
fn add_exe_real(exe_name: []const u8, path: std.Build.LazyPath, b: *std.Build) !*std.Build.Step.Compile {
+ // BCM2835 is **very specifically** the ARM1176JZF-S
+ // https://www.raspberrypi.com/documentation/computers/processors.html
var cpu_features = std.Target.arm.cpu.arm1176jzf_s.features;
cpu_features.addFeatureSet(std.Target.arm.featureSet(&[_]std.Target.arm.Feature{.strict_align}));
@@ -60,17 +62,14 @@ fn add_exe_real(exe_name: []const u8, path: std.Build.LazyPath, b: *std.Build) !
const pi = b.createModule(.{
.root_source_file = b.path("pi/root.zig"),
.target = b.graph.host,
- .optimize = .ReleaseFast,
+ .optimize = .ReleaseSafe,
});
- // BCM2835 is **very specifically** the ARM1176JZF-S
- // https://www.raspberrypi.com/documentation/computers/processors.html
const boot = b.createModule(.{
.root_source_file = path,
.target = b.resolveTargetQuery(target),
- .optimize = .ReleaseFast,
+ .optimize = .ReleaseSafe,
.unwind_tables = .none,
- .single_threaded = true,
.error_tracing = false,
.link_libc = false,
.no_builtin = true,
diff --git a/src/coroutine.zig b/src/coroutine.zig
@@ -0,0 +1,167 @@
+const std = @import("std");
+const Allocator = std.mem.Allocator;
+
+// tid zero is reserved for scheduler
+var tid_counter: usize = 1;
+// TODO; should we use PriorityQueue?
+var tasks: std.ArrayList(*Task) = undefined;
+var scheduler_task: *Task = undefined;
+
+const Task = struct {
+ tid: usize,
+
+ func: ?*const fn (?*anyopaque) callconv(.c) void,
+ arg: ?*anyopaque,
+
+ saved_sp: *usize,
+ // r0-r3 are caller-saved and so we don't need to store them
+ // r4-r11 are callee-saved
+ // r12 is scratch
+ // r13 is sp
+ // r14 is lr
+ // r15 is pc
+ stack: [1024]usize align(8),
+};
+
+fn alloc_id() usize {
+ const id = tid_counter;
+ tid_counter += 1;
+ return id;
+}
+
+fn push_stack(sp: *usize, value: usize) *usize {
+ const sp_new: *usize = @ptrFromInt(@intFromPtr(sp) - @sizeOf(usize));
+ sp_new.* = value;
+ return sp_new;
+}
+
+pub fn initialize(allocator: Allocator) !void {
+ tasks = try .initCapacity(allocator, 0);
+}
+
+pub fn fork(allocator: Allocator, f: *const fn (?*anyopaque) callconv(.c) void, arg: ?*anyopaque) !void {
+ var t = try allocator.create(Task);
+
+ t.tid = alloc_id();
+ t.func = f;
+ t.arg = arg;
+
+ // We should *hopefully* already be aligned to 8 bytes
+ const stack_top = @intFromPtr(&t.stack) + t.stack.len;
+ var sp: *usize = @ptrFromInt(stack_top);
+ // But might as well check for safety
+ std.debug.assertAligned(sp, .@"8");
+
+ // r4-r11
+ sp = push_stack(sp, @intFromPtr(&trampoline));
+ for (4..12) |v| {
+ sp = push_stack(sp, 15 - v);
+ }
+
+ t.saved_sp = sp;
+
+ _ = try tasks.append(allocator, t);
+}
+
+pub noinline fn join(allocator: Allocator) !void {
+ // Nothing to do
+ if (tasks.items.len == 0) return;
+
+ var t = try allocator.create(Task);
+ t.tid = 0;
+ t.saved_sp = @ptrFromInt(@frameAddress());
+
+ scheduler_task = t;
+
+ const first_task = tasks.items[0];
+
+ context_switch(&t.saved_sp, first_task.saved_sp);
+}
+
+pub fn get_current_task() *Task {
+ return tasks.items[0];
+}
+
+pub fn get_current_tid() ?usize {
+ return get_current_task().tid;
+}
+
+pub fn yield() void {
+ // We're the only thread, so we can resume operation
+ if (tasks.items.len == 1) {
+ return;
+ }
+
+ // Put ourselves into the end of the queue
+ // Context switch to the new thread
+ const old = tasks.orderedRemove(0);
+ const new = tasks.items[0];
+ _ = tasks.appendAssumeCapacity(old);
+
+ context_switch(&old.saved_sp, new.saved_sp);
+}
+
+fn exit() void {
+ // Remove task from list
+ const old = tasks.orderedRemove(0);
+ const new = if (tasks.items.len == 0) scheduler_task else tasks.items[0];
+
+ // TODO; ideally we'd dealloc old
+ // but that requires an allocator
+ context_switch(&old.saved_sp, new.saved_sp);
+}
+
+noinline fn trampoline() void {
+ const t = get_current_task();
+ if (t.func) |func| {
+ func(t.arg);
+ }
+ exit();
+}
+
+fn pop_stack(sp: **usize) usize {
+ const sp_new: *usize = @ptrFromInt(@intFromPtr(sp.*) + @sizeOf(usize));
+ const value = sp.*.*;
+ sp.* = sp_new;
+ return value;
+}
+
+// export fn dump_stack(r0: usize) callconv(.c) void {
+// // var sp: *usize = @ptrFromInt(@import("pi").get_sp());
+// var sp: *usize = @ptrFromInt(r0);
+
+// @import("devices/mini-uart.zig").writer.print("stack dump (sp = 0x{X}):\n", .{@intFromPtr(sp)}) catch {};
+
+// for (4..12) |r| {
+// const v = pop_stack(&sp);
+
+// @import("devices/mini-uart.zig").writer.print(" r{d} = 0x{X}\n", .{ r, v }) catch {};
+// }
+
+// const lr = pop_stack(&sp);
+// @import("devices/mini-uart.zig").writer.print(" lr = 0x{X}\n", .{lr}) catch {};
+// }
+
+comptime {
+ // r0 - **current_task_sp
+ // r1 - *next_task_sp
+
+ // Store lr, r4-r11 registers on stack
+ // Store sp to current_task.saved_sp
+ // Move current_task.next.saved_sp into sp
+ // Load lr, t4-r11 registers from stack into registers
+ // Return
+
+ // Remember, currently sp is of current_task's stack
+ asm (
+ \\ .global context_switch;
+ \\ .type context_switch, %function;
+ \\ context_switch:
+ \\ push {r4-r11,lr}
+ \\ str sp, [r0]
+ \\ mov sp, r1
+ \\ pop {r4-r11,lr}
+ \\ bx lr
+ );
+}
+extern fn context_switch(current_task_sp: **usize, next_task_sp: *usize) void;
diff --git a/src/labs/5-coroutine.zig b/src/labs/5-coroutine.zig
@@ -0,0 +1,37 @@
+const std = @import("std");
+
+const uart = @import("devices/mini-uart.zig");
+const coroutine = @import("coroutine.zig");
+
+export fn syscall_handler() void {}
+
+fn thread_test(arg: ?*anyopaque) callconv(.c) void {
+ _ = arg;
+
+ while (true) {
+ uart.writer.print("in thread tid={d}!\n", .{coroutine.get_current_tid().?}) catch {};
+ coroutine.yield();
+ }
+}
+
+pub fn main() !void {
+ var buffer: [1024 * 1024]u8 = undefined;
+ var fba: std.heap.FixedBufferAllocator = .init(&buffer);
+
+ uart.write_slice("initializing coroutines\n");
+ try coroutine.initialize(fba.allocator());
+
+ try coroutine.fork(fba.allocator(), thread_test, null);
+ try coroutine.fork(fba.allocator(), thread_test, null);
+ try coroutine.fork(fba.allocator(), thread_test, null);
+ try coroutine.fork(fba.allocator(), thread_test, null);
+ try coroutine.fork(fba.allocator(), thread_test, null);
+ try coroutine.fork(fba.allocator(), thread_test, null);
+ try coroutine.fork(fba.allocator(), thread_test, null);
+
+ uart.write_slice("joining coroutines\n");
+
+ try coroutine.join(fba.allocator());
+
+ uart.write_slice("done!\n");
+}