saolsen · October 21, 2025 16:37
diff --git a/README.md b/README.md
diff --git a/connect4.c b/connect4.c
 #include <assert.h>

 #include "connect4.h"

 void connect4_init(Connect4State *state) {
  for (int c = 0; c < COLS; c++) {
    for (int r = 0; r < ROWS; r++) {
      state->board[c][r] = 0;
    }
  }
  state->current_player = 1;
  state->status.kind = CONNECT4_IN_PROGRESS;
  state->status.winner = 0;
 }

 #define slot(state_p, col, row) (state_p)->board[(col)][(row)]

 #define check4(a, b, c, d) ((a) == (b) && (b) == (c) && (c) == (d) ? (a) : 0)

 Connect4Status connect4_check_status(Connect4State *state) {
  // check vertical
  for (int col = 0; col < COLS; col++) {
    for (int row = 0; row < 3; row++) {
      uint8_t check = check4(slot(state, col, row + 0),
                             slot(state, col, row + 1),
                             slot(state, col, row + 2),
                             slot(state, col, row + 3));
      if (check) {
        return (Connect4Status){.kind = CONNECT4_OVER, .winner = check};
      }
    }
  }
  // check horizontal
  for (int col = 0; col < 4; col++) {
    for (int row = 0; row < ROWS; row++) {
      uint8_t check = check4(slot(state, col + 0, row),
                             slot(state, col + 1, row),
                             slot(state, col + 2, row),
                             slot(state, col + 3, row));
      if (check) {
        return (Connect4Status){.kind = CONNECT4_OVER, .winner = check};
      }
    }
  }
  // check diagonal up
  for (int col = 0; col < 4; col++) {
    for (int row = 0; row < 3; row++) {
      uint8_t check = check4(slot(state, col + 0, row + 0),
                             slot(state, col + 1, row + 1),
                             slot(state, col + 2, row + 2),
                             slot(state, col + 3, row + 3));
      if (check) {
        return (Connect4Status){.kind = CONNECT4_OVER, .winner = check};
      }
    }
  }
  // check diagonal down
  for (int col = 0; col < 4; col++) {
    for (int row = 3; row < 6; row++) {
      uint8_t check = check4(slot(state, col + 0, row - 0),
                             slot(state, col + 1, row - 1),
                             slot(state, col + 2, row - 2),
                             slot(state, col + 3, row - 3));
      if (check) {
        return (Connect4Status){.kind = CONNECT4_OVER, .winner = check};
      }
    }
  }
  // check for possible moves
  for (int col = 0; col < COLS; col++) {
    if (slot(state, col, ROWS - 1) == 0) {
      return (Connect4Status){.kind = CONNECT4_IN_PROGRESS, .winner = 0};
    }
  }
  // no possible moves, draw
  return (Connect4Status){.kind = CONNECT4_OVER, .winner = 0};
 }

 bool connect4_check_action(Connect4State *state, uint8_t player, int col) {
  if (state->current_player != player) {
    // Not this player's turn.
    return false;
  }
  if (col < 0 || col >= COLS) {
    // Column out of bounds.
    return false;
  }
  if (slot(state, col, ROWS - 1) != 0) {
    // Column is full.
    return false;
  }
  return true;
 }

 void connect4_apply_action(Connect4State *state, uint8_t player, int col) {
  // Caller needs to assure this before calling.
  assert(connect4_check_action(state, player, col));

  for (int row = 0; row < ROWS; row++) {
    if (slot(state, col, row) == 0) {
      slot(state, col, row) = player;
      Connect4Status status = connect4_check_status(state);
      state->status = status;
      if (status.kind == CONNECT4_OVER) {
        state->current_player = 0;
      } else {
        state->current_player = state->current_player == 1 ? 2 : 1;
      }
      return;
    }
  }
  assert(false);
 }

 void connect4_undo_action(Connect4State *state, uint8_t player, int col) {
  for (int row = ROWS - 1; row >= 0; row--) {
    if (slot(state, col, row) == player) {
      slot(state, col, row) = 0;
      state->status.kind = CONNECT4_IN_PROGRESS;
      state->status.winner = 0;
      state->current_player = player;
      return;
    }
  }
  assert(false);
 }
diff --git a/connect4.h b/connect4.h
 #ifndef CONNECT4_H
 #define CONNECT4_H

 #include <stdbool.h>
 #include <stdint.h>

 #include <pthread.h>

 #define COLS 7
 #define ROWS 6

 typedef enum {
  CONNECT4_IN_PROGRESS,
  CONNECT4_OVER,
 } Connect4StatusKind;

 typedef struct {
  Connect4StatusKind kind;
  uint8_t winner;
 } Connect4Status;

 typedef struct {
  uint8_t board[COLS][ROWS];
  uint8_t current_player;
  Connect4Status status;
 } Connect4State;

 void connect4_init(Connect4State *state);

 Connect4Status connect4_check_status(Connect4State *state);
 bool connect4_check_action(Connect4State *state, uint8_t player, int col);
 void connect4_apply_action(Connect4State *state, uint8_t player, int col);
 void connect4_undo_action(Connect4State *state, uint8_t player, int col);

 typedef struct {
  bool in_use; // @OPT: Some other way to flag inactive.

  int32_t parent_i;
  int32_t parent_action_i;
  uint8_t depth;
  Connect4State state;

  int32_t actions_count;
  _Atomic(uint32_t) finished_actions_count;

  uint8_t actions[COLS];
  float action_scores[COLS];
  bool action_pushed[COLS]; // @OPT: not bitset instad of bool array.
 } Node;

 // 10 layers is fast enough in release mode to play against.
 // 8 is better for debug.
 #define LAYER_COUNT 8
 #define MAX_NODES_PER_LAYER 1000
 #define THREAD_COUNT 10

 typedef struct {
  Node node[MAX_NODES_PER_LAYER];
  int32_t count;
 } Layer;

 typedef struct {
  Layer layer[LAYER_COUNT];
 } Layers;

 typedef struct {
  int32_t index[MAX_NODES_PER_LAYER];
  int32_t count;
 } NewNodeIndexes;

 typedef struct {
  int32_t unpushed_child_count[THREAD_COUNT];
  NewNodeIndexes new_node_index[THREAD_COUNT];
 } NewNodes;

 typedef struct {
  int32_t thread_i;

  _Atomic(int32_t) *current_layer;

  Layers *layers;
  NewNodes *new_nodes;

  pthread_barrier_t *turn_barrier;
  pthread_barrier_t *thread_barrier;

  _Atomic(int32_t) *ai_turn_started;
  _Atomic(int32_t) *ai_turn_completed;
  _Atomic(int32_t) *shutdown;

  Connect4State *ai_turn_input;
  _Atomic(int32_t) *ai_turn_result;
 } Connect4ThreadState;

 void *connect4_ai_worker_thread_main(void *arg);
 void cli(_Atomic(int32_t) *ai_turn_started,
         _Atomic(int32_t) *ai_turn_completed,
         Connect4State *ai_turn_input,
         _Atomic(int32_t) *ai_turn_result,
         pthread_barrier_t *turn_barrier);

 #endif // CONNECT4_H
diff --git a/connect4_ai.c b/connect4_ai.c
 #include <assert.h>
 #include <pthread.h>
 #include <stdatomic.h>
 #include <stdint.h>
 #include <string.h>

 #include "connect4.h"

 void node_push(Node *node,
               int32_t parent_i,
               int32_t parent_action_i,
               const Connect4State *state,
               int32_t depth) {
  assert(node->in_use == false);
  node->in_use = true;

  node->parent_i = parent_i;
  node->parent_action_i = parent_action_i;
  node->depth = depth;
  node->state = *state;

  // Actions are not computed on push.
  // They are computed by node_init_actions.
  node->actions_count = 0;
  atomic_store(&node->finished_actions_count, 0);

  for (int col = 0; col < COLS; col++) {
    node->actions[col] = 0;
    node->action_scores[col] = 0;
    node->action_pushed[col] = false;
  }
 }

 void node_pop(Node *node) {
  assert(node->in_use == true);
  node->in_use = false;
 }

 void node_init_actions(Node *node) {
  assert(node->in_use == true);
  assert(node->actions_count == 0);
  assert(atomic_load(&node->finished_actions_count) == 0);
  // Find valid actions.
  for (uint8_t col = 0; col < COLS; col++) {
    uint8_t current_player = node->state.current_player;
    if (connect4_check_action(&node->state, current_player, col)) {
      int32_t action_i = node->actions_count++;
      node->actions[action_i] = col;

      // For terminal states compute the score right away.
      if (node->depth == LAYER_COUNT - 1) {
        node->action_scores[action_i] = 0;
        node->action_pushed[action_i] = true;
        atomic_fetch_add(&node->finished_actions_count, 1);
      } else {
        connect4_apply_action(&node->state, current_player, col);
        if (node->state.status.kind == CONNECT4_OVER) {
          float score;
          if (node->state.status.winner == current_player) {
            score = 1;
          } else {
            score = -1;
          }
          node->action_scores[action_i] = score;
          node->action_pushed[action_i] = true;
          atomic_fetch_add(&node->finished_actions_count, 1);
        }
        connect4_undo_action(&node->state, current_player, col);
      }
    }
  }
  assert(node->actions_count > 0);
 }

 int32_t node_count_children_to_push(Node *node) {
  assert(node->depth < LAYER_COUNT - 1);
  int32_t children_to_push = 0;
  for (int action_i = 0; action_i < node->actions_count; action_i++) {
    if (!node->action_pushed[action_i]) {
      children_to_push++;
    }
  }
  return children_to_push;
 }

 int32_t node_best_action(Node *node) {
  assert(node->parent_i == -1);
  assert(node->parent_action_i == -1);
  assert(node->depth == 0);
  assert(atomic_load(&node->finished_actions_count) == node->actions_count);

  int best_action = node->actions[0];
  float best_score = node->action_scores[0];
  for (int action_i = 1; action_i < node->actions_count; action_i++) {
    if (node->action_scores[action_i] >= best_score) {
      best_action = node->actions[action_i];
      best_score = node->action_scores[action_i];
    }
  }

  return best_action;
 }

 float node_score(Node *node) {
  assert(node->parent_i != -1);
  assert(node->parent_action_i != -1);
  assert(node->depth > 0);
  assert(atomic_load(&node->finished_actions_count) == node->actions_count);

  float total_score = 0;
  for (int action_i = 0; action_i < node->actions_count; action_i++) {
    total_score += node->action_scores[action_i];
  }
  float score = total_score / node->actions_count;
  return score;
 }

 void connect4_ai_action(int32_t thread_i,
                        _Atomic(int32_t) *current_layer,
                        const Connect4State *state,
                        Layers *layers,
                        NewNodes *new_nodes,
                        pthread_barrier_t *thread_barrier,
                        _Atomic(int32_t) *result) {
  assert(state->status.kind != CONNECT4_OVER);

  uint8_t current_player = state->current_player;

  int rc;
  rc = pthread_barrier_wait(thread_barrier);
  if (rc == PTHREAD_BARRIER_SERIAL_THREAD) {
    memset(layers, 0, sizeof(*layers));
    memset(new_nodes, 0, sizeof(*new_nodes));

    layers->layer[0].count = 1;
    Node *root = &layers->layer[0].node[0];
    node_push(root, -1, -1, state, 0);

    atomic_store(result, -1);

    atomic_store(current_layer, 0);
  } else {
    assert(rc == 0);
  }

  // @TODO: Should be able to prune on wins and losses.
  // I can also def prune on wins and losses at least, Since those are
  // precomputed, no reason to check other actions if one move is a win.

  while (true) {
    rc = pthread_barrier_wait(thread_barrier);
    assert(rc == 0 || rc == PTHREAD_BARRIER_SERIAL_THREAD);

    if (atomic_load(result) != -1) {
      break;
    }

    int32_t layer_i = atomic_load(current_layer);
    assert(layer_i >= 0);
    assert(layer_i < LAYER_COUNT);
    Layer *layer = &layers->layer[layer_i];

    // Each thread gets a distinct view of the current layer.
    int32_t values_per_thread = layer->count / THREAD_COUNT;
    int32_t leftover_count = layer->count % THREAD_COUNT;
    bool thread_has_leftover = thread_i < leftover_count;
    int32_t leftovers_before_me =
        (thread_has_leftover ? thread_i : leftover_count);
    int32_t thread_start = values_per_thread * thread_i + leftovers_before_me;
    int32_t thread_after =
        thread_start + values_per_thread + !!thread_has_leftover;

    int32_t unpushed_child_count = 0;

    rc = pthread_barrier_wait(thread_barrier);
    assert(rc == 0 || rc == PTHREAD_BARRIER_SERIAL_THREAD);

    for (int node_i = thread_start; node_i < thread_after; node_i++) {
      Node *node = &layer->node[node_i];
      if (!node->in_use) {
        continue;
      }

      assert(node->state.status.kind != CONNECT4_OVER);

      if (node->actions_count == 0) {
        node_init_actions(node);
      }
      assert(node->actions_count > 0);

      if (atomic_load(&node->finished_actions_count) == node->actions_count) {
        if (node->depth == 0) {
          int32_t best_action = node_best_action(node);
          atomic_store(result, best_action);
        } else {
          float score = node_score(node);

          Node *parent = &layers->layer[layer_i - 1].node[node->parent_i];

          assert(node->parent_i != -1);
          assert(parent->actions_count >= node->parent_action_i);
          assert(atomic_load(&parent->finished_actions_count) <
                 parent->actions_count);

          parent->action_scores[node->parent_action_i] = -score;
          atomic_fetch_add(&parent->finished_actions_count, 1);
        }
        node_pop(node);

      } else {
        unpushed_child_count += node_count_children_to_push(node);
      }
    }

    new_nodes->unpushed_child_count[thread_i] = unpushed_child_count;

    // On one thread, allocate each thread slots for it's child nodes.
    rc = pthread_barrier_wait(thread_barrier);
    if (rc == PTHREAD_BARRIER_SERIAL_THREAD) {
      uint32_t total_unpushed_child_count = 0;

      for (int alloc_thread_i = 0; alloc_thread_i < THREAD_COUNT;
           alloc_thread_i++) {
        total_unpushed_child_count +=
            new_nodes->unpushed_child_count[alloc_thread_i];
        new_nodes->new_node_index[alloc_thread_i].count = 0;
      }

      if (layer_i == LAYER_COUNT - 1) {
        assert(total_unpushed_child_count == 0);
      } else {
        Layer *next_layer = &layers->layer[layer_i + 1];

        int32_t new_node_i = 0;

        for (int alloc_thread_i = 0; alloc_thread_i < THREAD_COUNT;
             alloc_thread_i++) {
          while (new_nodes->new_node_index[alloc_thread_i].count <
                     new_nodes->unpushed_child_count[alloc_thread_i] &&
                 new_node_i < MAX_NODES_PER_LAYER) {
            if (!next_layer->node[new_node_i].in_use) {
              int32_t index_i =
                  new_nodes->new_node_index[alloc_thread_i].count++;
              new_nodes->new_node_index[alloc_thread_i].index[index_i] =
                  new_node_i;
            }
            new_node_i++;
          }
        }
        // Update count if we allocated nodes past the current count.
        if (new_node_i > next_layer->count) {
          next_layer->count = new_node_i;
        }
      }
    } else {
      assert(rc == 0);
    }

    // Wait for child node slots to be allocated, then push as many children
    // as were allocated.
    rc = pthread_barrier_wait(thread_barrier);
    assert(rc == 0 || rc == PTHREAD_BARRIER_SERIAL_THREAD);

    int32_t children_to_push = new_nodes->new_node_index[thread_i].count;
    int32_t children_pushed = 0;

    assert(children_to_push <= unpushed_child_count);

    // Loop through nodes second time to push new nodes to the next layer.
    for (int node_i = thread_start; node_i < thread_after; node_i++) {
      if (children_pushed == children_to_push) {
        break;
      }

      Node *node = &layer->node[node_i];
      if (!node->in_use) {
        continue;
      }
      assert(node->actions_count != 0);

      for (int action_i = 0; action_i < node->actions_count; action_i++) {
        if (!node->action_pushed[action_i]) {
          if (children_pushed == children_to_push) {
            break;
          }

          int32_t child_i =
              new_nodes->new_node_index[thread_i].index[children_pushed++];
          Node *child = &layers->layer[layer_i + 1].node[child_i];

          Connect4State child_state = node->state;
          connect4_apply_action(&child_state,
                                child_state.current_player,
                                node->actions[action_i]);

          node_push(child, node_i, action_i, &child_state, node->depth + 1);
          node->action_pushed[action_i] = true;
        }
      }
    }

    rc = pthread_barrier_wait(thread_barrier);
    if (rc == PTHREAD_BARRIER_SERIAL_THREAD) {
      bool any_children_pushed = false;
      for (int i = 0; i < THREAD_COUNT; i++) {
        if (new_nodes->new_node_index[i].count > 0) {
          any_children_pushed = true;
          break;
        }
      }
      if (any_children_pushed) {
        atomic_store(current_layer, layer_i + 1);
      } else {
        atomic_store(current_layer, layer_i - 1);
      }
    } else {
      assert(rc == 0);
    }
  }
 }

 // @NOTE:
 // Assumes that barriers are already initialized and the same reference
 // accross threads. Assumes all atomic references are to the same atomics
 // across threads. These threads run in the background and wait for a new
 // input to process.
 // To trigger a run.
 // - Set ai_turn_started and ai_turn_input.
 // - wait on the turn barrier to release the threads to begin processing.
 // - then do whatever you want
 // To check if the run is done look at ai_turn_completed, when it's the same
 // value as the ai_turn_started you set, the run is done.
 // To get the result wait on the turn barrier again to be sure the result was
 // set and then it'll be availabe in ai_turn_result.
 // To shutdown the threads (when they aren't working but waiting for a new
 // input.)
 // - set shutdown to 1
 // - wait on the barrier once more to start them back up, they'll see the
 // shutdown flag and return. then nobody will be waiting on the barrier so you
 // can clean it and the threads up.
 void *connect4_ai_worker_thread_main(void *arg) {
  Connect4ThreadState *thread_state = (Connect4ThreadState *)arg;

  // printf("thread %i: init\n", thread_state->thread_i);

  while (true) {
    // Wait for a new AI turn.
    int rc;
    rc = pthread_barrier_wait(thread_state->turn_barrier);
    assert(rc == 0 || rc == PTHREAD_BARRIER_SERIAL_THREAD);

    if (atomic_load(thread_state->shutdown)) {
      break;
    }

    rc = pthread_barrier_wait(thread_state->thread_barrier);
    assert(rc == 0 || rc == PTHREAD_BARRIER_SERIAL_THREAD);

    // call the function to calculate the answer.
    connect4_ai_action(thread_state->thread_i,
                       thread_state->current_layer,
                       thread_state->ai_turn_input,
                       thread_state->layers,
                       thread_state->new_nodes,
                       thread_state->thread_barrier,
                       thread_state->ai_turn_result);
    // printf("thread %i: workin\n", thread_state->thread_i);

    rc = pthread_barrier_wait(thread_state->thread_barrier);
    if (rc == PTHREAD_BARRIER_SERIAL_THREAD) {
      // Signal that we're done.
      atomic_store(thread_state->ai_turn_completed,
                   atomic_load(thread_state->ai_turn_started));
    } else {
      assert(rc == 0);
    }

    // Sync with main thread to "return".
    rc = pthread_barrier_wait(thread_state->turn_barrier);
    assert(rc == 0 || rc == PTHREAD_BARRIER_SERIAL_THREAD);
  }

  // printf("thread %i: shutdown\n", thread_state->thread_i);

  return NULL;
 }
diff --git a/connect4_cli.c b/connect4_cli.c
 #include <assert.h>
 #include <stdatomic.h>
 #include <stdio.h>

 #include "connect4.h"

 void screen_enter() { printf("\033[?1049h"); }
 void screen_clear() {
  printf("\033[2J"); // clear
  printf("\033[H");  // move cursor to home
 }
 void screen_exit() { printf("\033[?1049l"); }

 int read_number() {
  int input;
  if (scanf("%d", &input) != 1) {
    // clear buffer
    while (getchar() != '\n') {
    }
    return -1;
  }
  // clear any remaining input
  while (getchar() != '\n') {
  }

  return input;
 }

 void cli(_Atomic(int32_t) *ai_turn_started,
         _Atomic(int32_t) *ai_turn_completed,
         Connect4State *ai_turn_input,
         _Atomic(int32_t) *ai_turn_result,
         pthread_barrier_t *turn_barrier) {
  screen_enter();
  screen_clear();

  int current_player = 1;

  Connect4State state;
  connect4_init(&state);

  int human_player;
  for (;;) {
    printf("Choose player (1 for \033[31mRed\033[0m, 2 for "
           "\033[34mBlue\033[0m): ");
    human_player = read_number();
    if (human_player < 0) {
      screen_clear();
      printf("Error: Invalid input.\n");
    } else if (human_player < 1 || human_player > 2) {
      screen_clear();
      printf("Error: Invalid player %i.\n", human_player);
    } else {
      break;
    }
  }

  int ai_player = (human_player == 1) ? 2 : 1;

  screen_clear();

  int spinner = 0;

  struct timespec wait;
  wait.tv_sec = 0;
  wait.tv_nsec = 500000000;

  int turn = 1;

  while (state.status.kind == CONNECT4_IN_PROGRESS) {
    // Draw the board
    for (int r = ROWS - 1; r >= 0; r--) {
      for (int c = 0; c < COLS; c++) {
        if (state.board[c][r] == 0) {
          printf(". ");
        } else if (state.board[c][r] == 1) {
          printf("\033[31mR\033[0m ");
        } else {
          printf("\033[34mB\033[0m ");
        }
      }
      printf("\n");
    }

    printf("1 2 3 4 5 6 7\n");

    int col;
    if (state.current_player == human_player) {
      printf("Enter column (1-7) to drop your piece: ");
      int input = read_number();
      if (input < 0) {
        screen_clear();
        printf("Error: Invalid input.\n");
        continue;
      }
      col = input - 1;
      if (!connect4_check_action(&state, state.current_player, col)) {
        screen_clear();
        printf("Error: Invalid column %i.\n", input);
        continue;
      }
    } else {
      int current_ai_turn_started = atomic_load(ai_turn_started);
      int current_ai_turn_completed = atomic_load(ai_turn_completed);

      if (turn > current_ai_turn_started) {
        // Start next AI turn.
        atomic_store(ai_turn_started, turn);
        *ai_turn_input = state;

        int rc = pthread_barrier_wait(turn_barrier);
        assert(rc == 0 || rc == PTHREAD_BARRIER_SERIAL_THREAD);

        screen_clear();
        continue;
      } else if (turn == current_ai_turn_started &&
                 current_ai_turn_started > current_ai_turn_completed) {
        // Still processing, show spinner.
        switch (spinner++) {
        case 0: {
          printf("Waiting on AI /\n");
          nanosleep(&wait, NULL);
          screen_clear();
          continue;
        } break;
        case 1: {
          printf("Waiting on AI -\n");
          nanosleep(&wait, NULL);
          screen_clear();
          continue;
        } break;
        case 2: {
          printf("Waiting on AI \\\n");
          nanosleep(&wait, NULL);
          screen_clear();
          continue;
        } break;
        case 3: {
          spinner = 0;
          printf("Waiting on AI |\n");
          nanosleep(&wait, NULL);
          screen_clear();
          continue;
        } break;
        }
      } else if (turn == current_ai_turn_started &&
                 current_ai_turn_started == current_ai_turn_completed) {
        // Turn is done, get the result.
        int rc = pthread_barrier_wait(turn_barrier);
        assert(rc == 0 || rc == PTHREAD_BARRIER_SERIAL_THREAD);

        col = atomic_load(ai_turn_result);
      } else {
        assert(false); // Not sure what's up, bug.
      }
    }

    connect4_apply_action(&state, state.current_player, col);
    turn++;
    spinner = 0;
    screen_clear();
  }

  screen_exit();

  // Draw the board
  for (int r = ROWS - 1; r >= 0; r--) {
    for (int c = 0; c < COLS; c++) {
      if (state.board[c][r] == 0) {
        printf(". ");
      } else if (state.board[c][r] == 1) {
        printf("\033[31mR\033[0m ");
      } else {
        printf("\033[34mB\033[0m ");
      }
    }
    printf("\n");
  }

  switch (state.status.winner) {
  case 0: {
    printf("Game Over: It's a Draw!\n");
  } break;
  case 1: {
    printf("Game Over: Red Wins!\n");
  } break;
  case 2: {
    printf("Game Over: Blue Wins!\n");
  } break;
  default: {
    assert(false); // unreachable
    break;
  }
  }
 }
diff --git a/main.c b/main.c
 #include <assert.h>
 #include <pthread.h>
 #include <stdatomic.h>
 #include <stdio.h>
 #include <stdlib.h>

 #include "connect4.h"

 #include "connect4.c"
 #include "connect4_ai.c"
 #include "connect4_cli.c"

 static _Atomic(int32_t) current_layer = 0;

 static Layers layers;
 static NewNodes new_nodes;

 static pthread_barrier_t turn_barrier;
 static pthread_barrier_t thread_barrier;

 static _Atomic(int32_t) ai_turn_started = 0;
 static _Atomic(int32_t) ai_turn_completed = 0;
 static _Atomic(int32_t) shutdown = 0;

 static Connect4State ai_turn_input;
 static _Atomic(int32_t) ai_turn_result = -1;

 static pthread_t threads[THREAD_COUNT];
 static Connect4ThreadState thread_states[THREAD_COUNT];

 int main() {
  int rc;
  rc = pthread_barrier_init(&turn_barrier, NULL, THREAD_COUNT + 1);
  if (rc != 0) {
    fprintf(stderr, "pthread_barrier_init failed: %d\n", rc);
    return 1;
  }
  rc = pthread_barrier_init(&thread_barrier, NULL, THREAD_COUNT);
  if (rc != 0) {
    fprintf(stderr, "pthread_barrier_init failed: %d\n", rc);
    return 1;
  }

  atomic_store(&current_layer, 0);
  atomic_store(&ai_turn_started, 0);
  atomic_store(&ai_turn_completed, 0);
  atomic_store(&shutdown, 0);
  atomic_store(&ai_turn_result, -1);

  for (int thread_i = 0; thread_i < THREAD_COUNT; thread_i++) {
    Connect4ThreadState *thread_state = &thread_states[thread_i];
    thread_state->thread_i = thread_i;
    thread_state->current_layer = &current_layer;
    thread_state->layers = &layers;
    thread_state->new_nodes = &new_nodes;
    thread_state->turn_barrier = &turn_barrier;
    thread_state->thread_barrier = &thread_barrier;
    thread_state->ai_turn_started = &ai_turn_started;
    thread_state->ai_turn_completed = &ai_turn_completed;
    thread_state->shutdown = &shutdown;
    thread_state->ai_turn_input = &ai_turn_input;
    thread_state->ai_turn_result = &ai_turn_result;

    int rc = pthread_create(&threads[thread_i],
                            NULL,
                            connect4_ai_worker_thread_main,
                            thread_state);
    if (rc != 0) {
      fprintf(stderr, "pthread_create failed for %d: %d\n", thread_i, rc);
      return EXIT_FAILURE;
    }
  }

  Connect4State state;
  connect4_init(&state);

 #if 0 // Run a single turn.

  atomic_store(&ai_turn_completed, 0);
  atomic_store(&ai_turn_started, 1);
  ai_turn_input = state;

  rc = pthread_barrier_wait(&turn_barrier);
  assert(rc == 0 || rc == PTHREAD_BARRIER_SERIAL_THREAD);

  printf("Running one AI turn.\n");

  rc = pthread_barrier_wait(&turn_barrier);
  assert(rc == 0 || rc == PTHREAD_BARRIER_SERIAL_THREAD);

  printf("Done\n");

  int32_t result = atomic_load(&ai_turn_result);
  printf("Result: %i\n", result);

 #else // CLI to play against the AI.

  cli(&ai_turn_started,
      &ai_turn_completed,
      &ai_turn_input,
      &ai_turn_result,
      &turn_barrier);

 #endif
  // Shutdown workers.
  atomic_store(&shutdown, 1);
  pthread_barrier_wait(&turn_barrier);

  // Don't really have to do this but acts as an assert that they shut down.
  // Also leak checkers will complain if you don't.
  for (int thread_i = 0; thread_i < THREAD_COUNT; thread_i++) {
    void *result;
    pthread_join(threads[thread_i], &result);
    assert(result == NULL);
  }

  pthread_barrier_destroy(&thread_barrier);
  pthread_barrier_destroy(&turn_barrier);

  return 0;
 }
	#include <assert.h>

	#include "connect4.h"

	void connect4_init(Connect4State *state) {
	for (int c = 0; c < COLS; c++) {
	for (int r = 0; r < ROWS; r++) {
	state->board[c][r] = 0;
	}
	}
	state->current_player = 1;
	state->status.kind = CONNECT4_IN_PROGRESS;
	state->status.winner = 0;
	}

	#define slot(state_p, col, row) (state_p)->board[(col)][(row)]

	#define check4(a, b, c, d) ((a) == (b) && (b) == (c) && (c) == (d) ? (a) : 0)

	Connect4Status connect4_check_status(Connect4State *state) {
	// check vertical
	for (int col = 0; col < COLS; col++) {
	for (int row = 0; row < 3; row++) {
	uint8_t check = check4(slot(state, col, row + 0),
	slot(state, col, row + 1),
	slot(state, col, row + 2),
	slot(state, col, row + 3));
	if (check) {
	return (Connect4Status){.kind = CONNECT4_OVER, .winner = check};
	}
	}
	}
	// check horizontal
	for (int col = 0; col < 4; col++) {
	for (int row = 0; row < ROWS; row++) {
	uint8_t check = check4(slot(state, col + 0, row),
	slot(state, col + 1, row),
	slot(state, col + 2, row),
	slot(state, col + 3, row));
	if (check) {
	return (Connect4Status){.kind = CONNECT4_OVER, .winner = check};
	}
	}
	}
	// check diagonal up
	for (int col = 0; col < 4; col++) {
	for (int row = 0; row < 3; row++) {
	uint8_t check = check4(slot(state, col + 0, row + 0),
	slot(state, col + 1, row + 1),
	slot(state, col + 2, row + 2),
	slot(state, col + 3, row + 3));
	if (check) {
	return (Connect4Status){.kind = CONNECT4_OVER, .winner = check};
	}
	}
	}
	// check diagonal down
	for (int col = 0; col < 4; col++) {
	for (int row = 3; row < 6; row++) {
	uint8_t check = check4(slot(state, col + 0, row - 0),
	slot(state, col + 1, row - 1),
	slot(state, col + 2, row - 2),
	slot(state, col + 3, row - 3));
	if (check) {
	return (Connect4Status){.kind = CONNECT4_OVER, .winner = check};
	}
	}
	}
	// check for possible moves
	for (int col = 0; col < COLS; col++) {
	if (slot(state, col, ROWS - 1) == 0) {
	return (Connect4Status){.kind = CONNECT4_IN_PROGRESS, .winner = 0};
	}
	}
	// no possible moves, draw
	return (Connect4Status){.kind = CONNECT4_OVER, .winner = 0};
	}

	bool connect4_check_action(Connect4State *state, uint8_t player, int col) {
	if (state->current_player != player) {
	// Not this player's turn.
	return false;
	}
	if (col < 0 \|\| col >= COLS) {
	// Column out of bounds.
	return false;
	}
	if (slot(state, col, ROWS - 1) != 0) {
	// Column is full.
	return false;
	}
	return true;
	}

	void connect4_apply_action(Connect4State *state, uint8_t player, int col) {
	// Caller needs to assure this before calling.
	assert(connect4_check_action(state, player, col));

	for (int row = 0; row < ROWS; row++) {
	if (slot(state, col, row) == 0) {
	slot(state, col, row) = player;
	Connect4Status status = connect4_check_status(state);
	state->status = status;
	if (status.kind == CONNECT4_OVER) {
	state->current_player = 0;
	} else {
	state->current_player = state->current_player == 1 ? 2 : 1;
	}
	return;
	}
	}
	assert(false);
	}

	void connect4_undo_action(Connect4State *state, uint8_t player, int col) {
	for (int row = ROWS - 1; row >= 0; row--) {
	if (slot(state, col, row) == player) {
	slot(state, col, row) = 0;
	state->status.kind = CONNECT4_IN_PROGRESS;
	state->status.winner = 0;
	state->current_player = player;
	return;
	}
	}
	assert(false);
	}
	#ifndef CONNECT4_H
	#define CONNECT4_H

	#include <stdbool.h>
	#include <stdint.h>

	#include <pthread.h>

	#define COLS 7
	#define ROWS 6

	typedef enum {
	CONNECT4_IN_PROGRESS,
	CONNECT4_OVER,
	} Connect4StatusKind;

	typedef struct {
	Connect4StatusKind kind;
	uint8_t winner;
	} Connect4Status;

	typedef struct {
	uint8_t board[COLS][ROWS];
	uint8_t current_player;
	Connect4Status status;
	} Connect4State;

	void connect4_init(Connect4State *state);

	Connect4Status connect4_check_status(Connect4State *state);
	bool connect4_check_action(Connect4State *state, uint8_t player, int col);
	void connect4_apply_action(Connect4State *state, uint8_t player, int col);
	void connect4_undo_action(Connect4State *state, uint8_t player, int col);

	typedef struct {
	bool in_use; // @OPT: Some other way to flag inactive.

	int32_t parent_i;
	int32_t parent_action_i;
	uint8_t depth;
	Connect4State state;

	int32_t actions_count;
	_Atomic(uint32_t) finished_actions_count;

	uint8_t actions[COLS];
	float action_scores[COLS];
	bool action_pushed[COLS]; // @OPT: not bitset instad of bool array.
	} Node;

	// 10 layers is fast enough in release mode to play against.
	// 8 is better for debug.
	#define LAYER_COUNT 8
	#define MAX_NODES_PER_LAYER 1000
	#define THREAD_COUNT 10

	typedef struct {
	Node node[MAX_NODES_PER_LAYER];
	int32_t count;
	} Layer;

	typedef struct {
	Layer layer[LAYER_COUNT];
	} Layers;

	typedef struct {
	int32_t index[MAX_NODES_PER_LAYER];
	int32_t count;
	} NewNodeIndexes;

	typedef struct {
	int32_t unpushed_child_count[THREAD_COUNT];
	NewNodeIndexes new_node_index[THREAD_COUNT];
	} NewNodes;

	typedef struct {
	int32_t thread_i;

	_Atomic(int32_t) *current_layer;

	Layers *layers;
	NewNodes *new_nodes;

	pthread_barrier_t *turn_barrier;
	pthread_barrier_t *thread_barrier;

	_Atomic(int32_t) *ai_turn_started;
	_Atomic(int32_t) *ai_turn_completed;
	_Atomic(int32_t) *shutdown;

	Connect4State *ai_turn_input;
	_Atomic(int32_t) *ai_turn_result;
	} Connect4ThreadState;

	void connect4_ai_worker_thread_main(void arg);
	void cli(_Atomic(int32_t) *ai_turn_started,
	_Atomic(int32_t) *ai_turn_completed,
	Connect4State *ai_turn_input,
	_Atomic(int32_t) *ai_turn_result,
	pthread_barrier_t *turn_barrier);

	#endif // CONNECT4_H
	#include <assert.h>
	#include <pthread.h>
	#include <stdatomic.h>
	#include <stdint.h>
	#include <string.h>

	#include "connect4.h"

	void node_push(Node *node,
	int32_t parent_i,
	int32_t parent_action_i,
	const Connect4State *state,
	int32_t depth) {
	assert(node->in_use == false);
	node->in_use = true;

	node->parent_i = parent_i;
	node->parent_action_i = parent_action_i;
	node->depth = depth;
	node->state = *state;

	// Actions are not computed on push.
	// They are computed by node_init_actions.
	node->actions_count = 0;
	atomic_store(&node->finished_actions_count, 0);

	for (int col = 0; col < COLS; col++) {
	node->actions[col] = 0;
	node->action_scores[col] = 0;
	node->action_pushed[col] = false;
	}
	}

	void node_pop(Node *node) {
	assert(node->in_use == true);
	node->in_use = false;
	}

	void node_init_actions(Node *node) {
	assert(node->in_use == true);
	assert(node->actions_count == 0);
	assert(atomic_load(&node->finished_actions_count) == 0);
	// Find valid actions.
	for (uint8_t col = 0; col < COLS; col++) {
	uint8_t current_player = node->state.current_player;
	if (connect4_check_action(&node->state, current_player, col)) {
	int32_t action_i = node->actions_count++;
	node->actions[action_i] = col;

	// For terminal states compute the score right away.
	if (node->depth == LAYER_COUNT - 1) {
	node->action_scores[action_i] = 0;
	node->action_pushed[action_i] = true;
	atomic_fetch_add(&node->finished_actions_count, 1);
	} else {
	connect4_apply_action(&node->state, current_player, col);
	if (node->state.status.kind == CONNECT4_OVER) {
	float score;
	if (node->state.status.winner == current_player) {
	score = 1;
	} else {
	score = -1;
	}
	node->action_scores[action_i] = score;
	node->action_pushed[action_i] = true;
	atomic_fetch_add(&node->finished_actions_count, 1);
	}
	connect4_undo_action(&node->state, current_player, col);
	}
	}
	}
	assert(node->actions_count > 0);
	}

	int32_t node_count_children_to_push(Node *node) {
	assert(node->depth < LAYER_COUNT - 1);
	int32_t children_to_push = 0;
	for (int action_i = 0; action_i < node->actions_count; action_i++) {
	if (!node->action_pushed[action_i]) {
	children_to_push++;
	}
	}
	return children_to_push;
	}

	int32_t node_best_action(Node *node) {
	assert(node->parent_i == -1);
	assert(node->parent_action_i == -1);
	assert(node->depth == 0);
	assert(atomic_load(&node->finished_actions_count) == node->actions_count);

	int best_action = node->actions[0];
	float best_score = node->action_scores[0];
	for (int action_i = 1; action_i < node->actions_count; action_i++) {
	if (node->action_scores[action_i] >= best_score) {
	best_action = node->actions[action_i];
	best_score = node->action_scores[action_i];
	}
	}

	return best_action;
	}

	float node_score(Node *node) {
	assert(node->parent_i != -1);
	assert(node->parent_action_i != -1);
	assert(node->depth > 0);
	assert(atomic_load(&node->finished_actions_count) == node->actions_count);

	float total_score = 0;
	for (int action_i = 0; action_i < node->actions_count; action_i++) {
	total_score += node->action_scores[action_i];
	}
	float score = total_score / node->actions_count;
	return score;
	}

	void connect4_ai_action(int32_t thread_i,
	_Atomic(int32_t) *current_layer,
	const Connect4State *state,
	Layers *layers,
	NewNodes *new_nodes,
	pthread_barrier_t *thread_barrier,
	_Atomic(int32_t) *result) {
	assert(state->status.kind != CONNECT4_OVER);

	uint8_t current_player = state->current_player;

	int rc;
	rc = pthread_barrier_wait(thread_barrier);
	if (rc == PTHREAD_BARRIER_SERIAL_THREAD) {
	memset(layers, 0, sizeof(*layers));
	memset(new_nodes, 0, sizeof(*new_nodes));

	layers->layer[0].count = 1;
	Node *root = &layers->layer[0].node[0];
	node_push(root, -1, -1, state, 0);

	atomic_store(result, -1);

	atomic_store(current_layer, 0);
	} else {
	assert(rc == 0);
	}

	// @TODO: Should be able to prune on wins and losses.
	// I can also def prune on wins and losses at least, Since those are
	// precomputed, no reason to check other actions if one move is a win.

	while (true) {
	rc = pthread_barrier_wait(thread_barrier);
	assert(rc == 0 \|\| rc == PTHREAD_BARRIER_SERIAL_THREAD);

	if (atomic_load(result) != -1) {
	break;
	}

	int32_t layer_i = atomic_load(current_layer);
	assert(layer_i >= 0);
	assert(layer_i < LAYER_COUNT);
	Layer *layer = &layers->layer[layer_i];

	// Each thread gets a distinct view of the current layer.
	int32_t values_per_thread = layer->count / THREAD_COUNT;
	int32_t leftover_count = layer->count % THREAD_COUNT;
	bool thread_has_leftover = thread_i < leftover_count;
	int32_t leftovers_before_me =
	(thread_has_leftover ? thread_i : leftover_count);
	int32_t thread_start = values_per_thread * thread_i + leftovers_before_me;
	int32_t thread_after =
	thread_start + values_per_thread + !!thread_has_leftover;

	int32_t unpushed_child_count = 0;

	rc = pthread_barrier_wait(thread_barrier);
	assert(rc == 0 \|\| rc == PTHREAD_BARRIER_SERIAL_THREAD);

	for (int node_i = thread_start; node_i < thread_after; node_i++) {
	Node *node = &layer->node[node_i];
	if (!node->in_use) {
	continue;
	}

	assert(node->state.status.kind != CONNECT4_OVER);

	if (node->actions_count == 0) {
	node_init_actions(node);
	}
	assert(node->actions_count > 0);

	if (atomic_load(&node->finished_actions_count) == node->actions_count) {
	if (node->depth == 0) {
	int32_t best_action = node_best_action(node);
	atomic_store(result, best_action);
	} else {
	float score = node_score(node);

	Node *parent = &layers->layer[layer_i - 1].node[node->parent_i];

	assert(node->parent_i != -1);
	assert(parent->actions_count >= node->parent_action_i);
	assert(atomic_load(&parent->finished_actions_count) <
	parent->actions_count);

	parent->action_scores[node->parent_action_i] = -score;
	atomic_fetch_add(&parent->finished_actions_count, 1);
	}
	node_pop(node);

	} else {
	unpushed_child_count += node_count_children_to_push(node);
	}
	}

	new_nodes->unpushed_child_count[thread_i] = unpushed_child_count;

	// On one thread, allocate each thread slots for it's child nodes.
	rc = pthread_barrier_wait(thread_barrier);
	if (rc == PTHREAD_BARRIER_SERIAL_THREAD) {
	uint32_t total_unpushed_child_count = 0;

	for (int alloc_thread_i = 0; alloc_thread_i < THREAD_COUNT;
	alloc_thread_i++) {
	total_unpushed_child_count +=
	new_nodes->unpushed_child_count[alloc_thread_i];
	new_nodes->new_node_index[alloc_thread_i].count = 0;
	}

	if (layer_i == LAYER_COUNT - 1) {
	assert(total_unpushed_child_count == 0);
	} else {
	Layer *next_layer = &layers->layer[layer_i + 1];

	int32_t new_node_i = 0;

	for (int alloc_thread_i = 0; alloc_thread_i < THREAD_COUNT;
	alloc_thread_i++) {
	while (new_nodes->new_node_index[alloc_thread_i].count <
	new_nodes->unpushed_child_count[alloc_thread_i] &&
	new_node_i < MAX_NODES_PER_LAYER) {
	if (!next_layer->node[new_node_i].in_use) {
	int32_t index_i =
	new_nodes->new_node_index[alloc_thread_i].count++;
	new_nodes->new_node_index[alloc_thread_i].index[index_i] =
	new_node_i;
	}
	new_node_i++;
	}
	}
	// Update count if we allocated nodes past the current count.
	if (new_node_i > next_layer->count) {
	next_layer->count = new_node_i;
	}
	}
	} else {
	assert(rc == 0);
	}

	// Wait for child node slots to be allocated, then push as many children
	// as were allocated.
	rc = pthread_barrier_wait(thread_barrier);
	assert(rc == 0 \|\| rc == PTHREAD_BARRIER_SERIAL_THREAD);

	int32_t children_to_push = new_nodes->new_node_index[thread_i].count;
	int32_t children_pushed = 0;

	assert(children_to_push <= unpushed_child_count);

	// Loop through nodes second time to push new nodes to the next layer.
	for (int node_i = thread_start; node_i < thread_after; node_i++) {
	if (children_pushed == children_to_push) {
	break;
	}

	Node *node = &layer->node[node_i];
	if (!node->in_use) {
	continue;
	}
	assert(node->actions_count != 0);

	for (int action_i = 0; action_i < node->actions_count; action_i++) {
	if (!node->action_pushed[action_i]) {
	if (children_pushed == children_to_push) {
	break;
	}

	int32_t child_i =
	new_nodes->new_node_index[thread_i].index[children_pushed++];
	Node *child = &layers->layer[layer_i + 1].node[child_i];

	Connect4State child_state = node->state;
	connect4_apply_action(&child_state,
	child_state.current_player,
	node->actions[action_i]);

	node_push(child, node_i, action_i, &child_state, node->depth + 1);
	node->action_pushed[action_i] = true;
	}
	}
	}

	rc = pthread_barrier_wait(thread_barrier);
	if (rc == PTHREAD_BARRIER_SERIAL_THREAD) {
	bool any_children_pushed = false;
	for (int i = 0; i < THREAD_COUNT; i++) {
	if (new_nodes->new_node_index[i].count > 0) {
	any_children_pushed = true;
	break;
	}
	}
	if (any_children_pushed) {
	atomic_store(current_layer, layer_i + 1);
	} else {
	atomic_store(current_layer, layer_i - 1);
	}
	} else {
	assert(rc == 0);
	}
	}
	}

	// @NOTE:
	// Assumes that barriers are already initialized and the same reference
	// accross threads. Assumes all atomic references are to the same atomics
	// across threads. These threads run in the background and wait for a new
	// input to process.
	// To trigger a run.
	// - Set ai_turn_started and ai_turn_input.
	// - wait on the turn barrier to release the threads to begin processing.
	// - then do whatever you want
	// To check if the run is done look at ai_turn_completed, when it's the same
	// value as the ai_turn_started you set, the run is done.
	// To get the result wait on the turn barrier again to be sure the result was
	// set and then it'll be availabe in ai_turn_result.
	// To shutdown the threads (when they aren't working but waiting for a new
	// input.)
	// - set shutdown to 1
	// - wait on the barrier once more to start them back up, they'll see the
	// shutdown flag and return. then nobody will be waiting on the barrier so you
	// can clean it and the threads up.
	void connect4_ai_worker_thread_main(void arg) {
	Connect4ThreadState thread_state = (Connect4ThreadState )arg;

	// printf("thread %i: init\n", thread_state->thread_i);

	while (true) {
	// Wait for a new AI turn.
	int rc;
	rc = pthread_barrier_wait(thread_state->turn_barrier);
	assert(rc == 0 \|\| rc == PTHREAD_BARRIER_SERIAL_THREAD);

	if (atomic_load(thread_state->shutdown)) {
	break;
	}

	rc = pthread_barrier_wait(thread_state->thread_barrier);
	assert(rc == 0 \|\| rc == PTHREAD_BARRIER_SERIAL_THREAD);

	// call the function to calculate the answer.
	connect4_ai_action(thread_state->thread_i,
	thread_state->current_layer,
	thread_state->ai_turn_input,
	thread_state->layers,
	thread_state->new_nodes,
	thread_state->thread_barrier,
	thread_state->ai_turn_result);
	// printf("thread %i: workin\n", thread_state->thread_i);

	rc = pthread_barrier_wait(thread_state->thread_barrier);
	if (rc == PTHREAD_BARRIER_SERIAL_THREAD) {
	// Signal that we're done.
	atomic_store(thread_state->ai_turn_completed,
	atomic_load(thread_state->ai_turn_started));
	} else {
	assert(rc == 0);
	}

	// Sync with main thread to "return".
	rc = pthread_barrier_wait(thread_state->turn_barrier);
	assert(rc == 0 \|\| rc == PTHREAD_BARRIER_SERIAL_THREAD);
	}

	// printf("thread %i: shutdown\n", thread_state->thread_i);

	return NULL;
	}
	#include <assert.h>
	#include <stdatomic.h>
	#include <stdio.h>

	#include "connect4.h"

	void screen_enter() { printf("\033[?1049h"); }
	void screen_clear() {
	printf("\033[2J"); // clear
	printf("\033[H"); // move cursor to home
	}
	void screen_exit() { printf("\033[?1049l"); }

	int read_number() {
	int input;
	if (scanf("%d", &input) != 1) {
	// clear buffer
	while (getchar() != '\n') {
	}
	return -1;
	}
	// clear any remaining input
	while (getchar() != '\n') {
	}

	return input;
	}

	void cli(_Atomic(int32_t) *ai_turn_started,
	_Atomic(int32_t) *ai_turn_completed,
	Connect4State *ai_turn_input,
	_Atomic(int32_t) *ai_turn_result,
	pthread_barrier_t *turn_barrier) {
	screen_enter();
	screen_clear();

	int current_player = 1;

	Connect4State state;
	connect4_init(&state);

	int human_player;
	for (;;) {
	printf("Choose player (1 for \033[31mRed\033[0m, 2 for "
	"\033[34mBlue\033[0m): ");
	human_player = read_number();
	if (human_player < 0) {
	screen_clear();
	printf("Error: Invalid input.\n");
	} else if (human_player < 1 \|\| human_player > 2) {
	screen_clear();
	printf("Error: Invalid player %i.\n", human_player);
	} else {
	break;
	}
	}

	int ai_player = (human_player == 1) ? 2 : 1;

	screen_clear();

	int spinner = 0;

	struct timespec wait;
	wait.tv_sec = 0;
	wait.tv_nsec = 500000000;

	int turn = 1;

	while (state.status.kind == CONNECT4_IN_PROGRESS) {
	// Draw the board
	for (int r = ROWS - 1; r >= 0; r--) {
	for (int c = 0; c < COLS; c++) {
	if (state.board[c][r] == 0) {
	printf(". ");
	} else if (state.board[c][r] == 1) {
	printf("\033[31mR\033[0m ");
	} else {
	printf("\033[34mB\033[0m ");
	}
	}
	printf("\n");
	}

	printf("1 2 3 4 5 6 7\n");

	int col;
	if (state.current_player == human_player) {
	printf("Enter column (1-7) to drop your piece: ");
	int input = read_number();
	if (input < 0) {
	screen_clear();
	printf("Error: Invalid input.\n");
	continue;
	}
	col = input - 1;
	if (!connect4_check_action(&state, state.current_player, col)) {
	screen_clear();
	printf("Error: Invalid column %i.\n", input);
	continue;
	}
	} else {
	int current_ai_turn_started = atomic_load(ai_turn_started);
	int current_ai_turn_completed = atomic_load(ai_turn_completed);

	if (turn > current_ai_turn_started) {
	// Start next AI turn.
	atomic_store(ai_turn_started, turn);
	*ai_turn_input = state;

	int rc = pthread_barrier_wait(turn_barrier);
	assert(rc == 0 \|\| rc == PTHREAD_BARRIER_SERIAL_THREAD);

	screen_clear();
	continue;
	} else if (turn == current_ai_turn_started &&
	current_ai_turn_started > current_ai_turn_completed) {
	// Still processing, show spinner.
	switch (spinner++) {
	case 0: {
	printf("Waiting on AI /\n");
	nanosleep(&wait, NULL);
	screen_clear();
	continue;
	} break;
	case 1: {
	printf("Waiting on AI -\n");
	nanosleep(&wait, NULL);
	screen_clear();
	continue;
	} break;
	case 2: {
	printf("Waiting on AI \\\n");
	nanosleep(&wait, NULL);
	screen_clear();
	continue;
	} break;
	case 3: {
	spinner = 0;
	printf("Waiting on AI \|\n");
	nanosleep(&wait, NULL);
	screen_clear();
	continue;
	} break;
	}
	} else if (turn == current_ai_turn_started &&
	current_ai_turn_started == current_ai_turn_completed) {
	// Turn is done, get the result.
	int rc = pthread_barrier_wait(turn_barrier);
	assert(rc == 0 \|\| rc == PTHREAD_BARRIER_SERIAL_THREAD);

	col = atomic_load(ai_turn_result);
	} else {
	assert(false); // Not sure what's up, bug.
	}
	}

	connect4_apply_action(&state, state.current_player, col);
	turn++;
	spinner = 0;
	screen_clear();
	}

	screen_exit();

	// Draw the board
	for (int r = ROWS - 1; r >= 0; r--) {
	for (int c = 0; c < COLS; c++) {
	if (state.board[c][r] == 0) {
	printf(". ");
	} else if (state.board[c][r] == 1) {
	printf("\033[31mR\033[0m ");
	} else {
	printf("\033[34mB\033[0m ");
	}
	}
	printf("\n");
	}

	switch (state.status.winner) {
	case 0: {
	printf("Game Over: It's a Draw!\n");
	} break;
	case 1: {
	printf("Game Over: Red Wins!\n");
	} break;
	case 2: {
	printf("Game Over: Blue Wins!\n");
	} break;
	default: {
	assert(false); // unreachable
	break;
	}
	}
	}