#define _GNU_SOURCE

#include <pthread.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#include "spinlock.h"

#define NSECS_PER_SEC		(1000000000ULL)
#define TEST_DURATION_SECS	10

static spinlock_t lock = SPIN_LOCK_UNLOCKED;
static volatile int exiting = 0;

__thread int __smp_processor_id;

struct thread_arg {
	unsigned long long cs_time;
	unsigned long long acquisitions;
	int id;
};

static void *thread_fn(void *_arg)
{
	struct timespec ts;
	struct thread_arg *arg = _arg;
	unsigned long long cs_time = arg->cs_time;
	unsigned long long s_ns, e_ns;

	smp_processor_id() = arg->id;
	arg->acquisitions = 0;

	while (!exiting) {
		spin_lock(&lock);
		arg->acquisitions++;
		if (clock_gettime(CLOCK_MONOTONIC, &ts))
			perror("failed to get time");

		e_ns = s_ns = ts.tv_nsec + (ts.tv_sec * NSECS_PER_SEC);
		while (e_ns - s_ns < cs_time) {
			if (clock_gettime(CLOCK_MONOTONIC, &ts))
				perror("failed to get time");
			e_ns = ts.tv_nsec + (ts.tv_sec * NSECS_PER_SEC);
		}
		spin_unlock(&lock);
		/*
		 * TODO: play with nanosleep() here to avoid MCS queueing
		 * with two threads
		 */
	}

	return NULL;
}

static void alrm_handler(int signo)
{
	exiting = 1;
}

static int run_test(unsigned long nthreads, unsigned long long cs_time)
{
	int i;
	timer_t timer;
	unsigned long long acquisitions = 0, overhead;
	pthread_t threads[nthreads];
	struct thread_arg args[nthreads];
	const struct itimerspec ispec = {
		.it_value = (struct timespec) {
			.tv_sec = TEST_DURATION_SECS,
		},
	};

	if (timer_create(CLOCK_MONOTONIC, NULL, &timer)) {
		perror("failed to create timer");
		return EXIT_FAILURE;
	}

	signal(SIGALRM, alrm_handler);

	spin_lock(&lock);
	for (i = 0; i < nthreads; ++i) {
		cpu_set_t cpuset;

		args[i] = (struct thread_arg) {
			.cs_time	= cs_time,
			.acquisitions	= 0,
			.id		= i,
		};

		if (pthread_create(&threads[i], NULL, thread_fn, &args[i])) {
			perror("failed to spawn worker thread");
			return EXIT_FAILURE;
		}

		CPU_ZERO(&cpuset);
		CPU_SET(i, &cpuset);
		if (pthread_setaffinity_np(threads[i], sizeof(cpuset), &cpuset)) {
			perror("failed to set thread affinity");
			return EXIT_FAILURE;
		}
	}

	if (timer_settime(timer, 0, &ispec, NULL)) {
		perror("failed to arm timer");
		return EXIT_FAILURE;
	}
	spin_unlock(&lock);

	for (i = 0; i < nthreads; ++i) {
		if (pthread_join(threads[i], NULL))
			perror("failed to join worker thread");
		printf("Thread %d got %llu acquisitions\n", i, args[i].acquisitions);
		acquisitions += args[i].acquisitions;
	}

	overhead = (TEST_DURATION_SECS * NSECS_PER_SEC) - (acquisitions * cs_time);
	printf("--------\nTotal number of acquisitions in %d seconds: %llu (overhead %lluns [%f%%])\n",
		TEST_DURATION_SECS, acquisitions, overhead, ((double)overhead / (TEST_DURATION_SECS * NSECS_PER_SEC)) * 100);
	timer_delete(timer);
	exiting = 0;
	fflush(stdout);
	return EXIT_SUCCESS;
}

int main(int argc, char **argv)
{
	int i, ret;
	unsigned long nthreads;
	unsigned long long cs_time = 500;

	switch (argc) {
	case 3:
		cs_time = strtoull(argv[2], NULL, 0);
	case 2:
		nthreads = strtoul(argv[1], NULL, 0);
		if (nthreads > MAX_THREADS) {
			nthreads = MAX_THREADS;
			fprintf(stderr, "Capping number of threads to %lu\n", nthreads);
		}
		break;
	default:
		fprintf(stderr, "Usage: %s <nthreads> [cs time (ns)]\n",
			argv[0]);
		return EXIT_FAILURE;
	}

	printf("Running %s spinlock test with %lu threads, critical section of %lluns\n",
		LOCK_NAME, nthreads, cs_time);

	for (i = 0; i < 5; ++i) {
		ret = run_test(nthreads, cs_time);
		if (ret)
			break;
	}

	return ret;
}
