Implement coroutines - sylveos - Toy Operating System

commit ba46ad17f18ed4f0afd3bfe6d924a137e5ecc42d
parent ae01224987871318eb43bb22ede4b85ea1134265
Author: Sylvia Ivory <git@sivory.net>
Date:   Sat, 24 Jan 2026 16:36:53 -0800

Implement coroutines

Diffstat:
M build.zig  | 9 ++++-----
A src/coroutine.zig  | 167 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A src/labs/5-coroutine.zig  | 37 +++++++++++++++++++++++++++++++++++++

3 files changed, 208 insertions(+), 5 deletions(-)
diff --git a/build.zig b/build.zig
@@ -46,6 +46,8 @@ fn add_exe_fake_pi(exe_name: []const u8, path: std.Build.LazyPath, b: *std.Build
 }
 
 fn add_exe_real(exe_name: []const u8, path: std.Build.LazyPath, b: *std.Build) !*std.Build.Step.Compile {
+    // BCM2835 is **very specifically** the ARM1176JZF-S
+    // https://www.raspberrypi.com/documentation/computers/processors.html
     var cpu_features = std.Target.arm.cpu.arm1176jzf_s.features;
     cpu_features.addFeatureSet(std.Target.arm.featureSet(&[_]std.Target.arm.Feature{.strict_align}));
 
@@ -60,17 +62,14 @@ fn add_exe_real(exe_name: []const u8, path: std.Build.LazyPath, b: *std.Build) !
     const pi = b.createModule(.{
         .root_source_file = b.path("pi/root.zig"),
         .target = b.graph.host,
-        .optimize = .ReleaseFast,
+        .optimize = .ReleaseSafe,
     });
 
-    // BCM2835 is **very specifically** the ARM1176JZF-S
-    // https://www.raspberrypi.com/documentation/computers/processors.html
     const boot = b.createModule(.{
         .root_source_file = path,
         .target = b.resolveTargetQuery(target),
-        .optimize = .ReleaseFast,
+        .optimize = .ReleaseSafe,
         .unwind_tables = .none,
-        .single_threaded = true,
         .error_tracing = false,
         .link_libc = false,
         .no_builtin = true,
diff --git a/src/coroutine.zig b/src/coroutine.zig
@@ -0,0 +1,167 @@
+const std = @import("std");
+const Allocator = std.mem.Allocator;
+
+// tid zero is reserved for scheduler
+var tid_counter: usize = 1;
+// TODO; should we use PriorityQueue?
+var tasks: std.ArrayList(*Task) = undefined;
+var scheduler_task: *Task = undefined;
+
+const Task = struct {
+    tid: usize,
+
+    func: ?*const fn (?*anyopaque) callconv(.c) void,
+    arg: ?*anyopaque,
+
+    saved_sp: *usize,
+    // r0-r3 are caller-saved and so we don't need to store them
+    // r4-r11 are callee-saved
+    // r12 is scratch
+    // r13 is sp
+    // r14 is lr
+    // r15 is pc
+    stack: [1024]usize align(8),
+};
+
+fn alloc_id() usize {
+    const id = tid_counter;
+    tid_counter += 1;
+    return id;
+}
+
+fn push_stack(sp: *usize, value: usize) *usize {
+    const sp_new: *usize = @ptrFromInt(@intFromPtr(sp) - @sizeOf(usize));
+    sp_new.* = value;
+    return sp_new;
+}
+
+pub fn initialize(allocator: Allocator) !void {
+    tasks = try .initCapacity(allocator, 0);
+}
+
+pub fn fork(allocator: Allocator, f: *const fn (?*anyopaque) callconv(.c) void, arg: ?*anyopaque) !void {
+    var t = try allocator.create(Task);
+
+    t.tid = alloc_id();
+    t.func = f;
+    t.arg = arg;
+
+    // We should *hopefully* already be aligned to 8 bytes
+    const stack_top = @intFromPtr(&t.stack) + t.stack.len;
+    var sp: *usize = @ptrFromInt(stack_top);
+    // But might as well check for safety
+    std.debug.assertAligned(sp, .@"8");
+
+    // r4-r11
+    sp = push_stack(sp, @intFromPtr(&trampoline));
+    for (4..12) |v| {
+        sp = push_stack(sp, 15 - v);
+    }
+
+    t.saved_sp = sp;
+
+    _ = try tasks.append(allocator, t);
+}
+
+pub noinline fn join(allocator: Allocator) !void {
+    // Nothing to do
+    if (tasks.items.len == 0) return;
+
+    var t = try allocator.create(Task);
+    t.tid = 0;
+    t.saved_sp = @ptrFromInt(@frameAddress());
+
+    scheduler_task = t;
+
+    const first_task = tasks.items[0];
+
+    context_switch(&t.saved_sp, first_task.saved_sp);
+}
+
+pub fn get_current_task() *Task {
+    return tasks.items[0];
+}
+
+pub fn get_current_tid() ?usize {
+    return get_current_task().tid;
+}
+
+pub fn yield() void {
+    // We're the only thread, so we can resume operation
+    if (tasks.items.len == 1) {
+        return;
+    }
+
+    // Put ourselves into the end of the queue
+    // Context switch to the new thread
+    const old = tasks.orderedRemove(0);
+    const new = tasks.items[0];
+    _ = tasks.appendAssumeCapacity(old);
+
+    context_switch(&old.saved_sp, new.saved_sp);
+}
+
+fn exit() void {
+    // Remove task from list
+    const old = tasks.orderedRemove(0);
+    const new = if (tasks.items.len == 0) scheduler_task else tasks.items[0];
+
+    // TODO; ideally we'd dealloc old
+    // but that requires an allocator
+    context_switch(&old.saved_sp, new.saved_sp);
+}
+
+noinline fn trampoline() void {
+    const t = get_current_task();
+    if (t.func) |func| {
+        func(t.arg);
+    }
+    exit();
+}
+
+fn pop_stack(sp: **usize) usize {
+    const sp_new: *usize = @ptrFromInt(@intFromPtr(sp.*) + @sizeOf(usize));
+    const value = sp.*.*;
+    sp.* = sp_new;
+    return value;
+}
+
+// export fn dump_stack(r0: usize) callconv(.c) void {
+//     // var sp: *usize = @ptrFromInt(@import("pi").get_sp());
+//     var sp: *usize = @ptrFromInt(r0);
+
+//     @import("devices/mini-uart.zig").writer.print("stack dump (sp = 0x{X}):\n", .{@intFromPtr(sp)}) catch {};
+
+//     for (4..12) |r| {
+//         const v = pop_stack(&sp);
+
+//         @import("devices/mini-uart.zig").writer.print(" r{d} = 0x{X}\n", .{ r, v }) catch {};
+//     }
+
+//     const lr = pop_stack(&sp);
+//     @import("devices/mini-uart.zig").writer.print(" lr = 0x{X}\n", .{lr}) catch {};
+// }
+
+comptime {
+    // r0 - **current_task_sp
+    // r1 - *next_task_sp
+
+    // Store lr, r4-r11 registers on stack
+    // Store sp to current_task.saved_sp
+    // Move current_task.next.saved_sp into sp
+    // Load lr, t4-r11 registers from stack into registers
+    // Return
+
+    // Remember, currently sp is of current_task's stack
+    asm (
+        \\ .global context_switch;
+        \\ .type context_switch, %function;
+        \\ context_switch:
+        \\   push {r4-r11,lr}
+        \\   str  sp, [r0]
+        \\   mov  sp, r1
+        \\   pop  {r4-r11,lr}
+        \\   bx   lr
+    );
+}
+extern fn context_switch(current_task_sp: **usize, next_task_sp: *usize) void;
diff --git a/src/labs/5-coroutine.zig b/src/labs/5-coroutine.zig
@@ -0,0 +1,37 @@
+const std = @import("std");
+
+const uart = @import("devices/mini-uart.zig");
+const coroutine = @import("coroutine.zig");
+
+export fn syscall_handler() void {}
+
+fn thread_test(arg: ?*anyopaque) callconv(.c) void {
+    _ = arg;
+
+    while (true) {
+        uart.writer.print("in thread tid={d}!\n", .{coroutine.get_current_tid().?}) catch {};
+        coroutine.yield();
+    }
+}
+
+pub fn main() !void {
+    var buffer: [1024 * 1024]u8 = undefined;
+    var fba: std.heap.FixedBufferAllocator = .init(&buffer);
+
+    uart.write_slice("initializing coroutines\n");
+    try coroutine.initialize(fba.allocator());
+
+    try coroutine.fork(fba.allocator(), thread_test, null);
+    try coroutine.fork(fba.allocator(), thread_test, null);
+    try coroutine.fork(fba.allocator(), thread_test, null);
+    try coroutine.fork(fba.allocator(), thread_test, null);
+    try coroutine.fork(fba.allocator(), thread_test, null);
+    try coroutine.fork(fba.allocator(), thread_test, null);
+    try coroutine.fork(fba.allocator(), thread_test, null);
+
+    uart.write_slice("joining coroutines\n");
+
+    try coroutine.join(fba.allocator());
+
+    uart.write_slice("done!\n");
+}

M	build.zig	\|	9	++++-----
A	src/coroutine.zig	\|	167	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	src/labs/5-coroutine.zig	\|	37	+++++++++++++++++++++++++++++++++++++