#include <windows.h>
#include <stdio.h>
#include "Sync.h"

#define MAX_SIZE 13
// #define DEBUG

class Work {
public:
  int cand[MAX_SIZE];
  int pos;
  Work(){pos = 0;}
  int pop(){pos--; return cand[pos];}
  void push(int val){cand[pos] = val; pos++;}
};

class AnsList{
public:
  int ans[MAX_SIZE];
  AnsList* next;
  static void* operator new(size_t size){
    void* ptr = pards_shmalloc(size);
    return ptr;
  }
  static void operator delete(void* obj){
    pards_shmfree(obj);
    return;
  }
};

AnsList* appendAnsList(AnsList* a, AnsList* b){
  if(a == 0) return b;
  else {
    AnsList* tmp = a;
    while(tmp->next != 0) tmp = tmp->next;
    tmp->next = b;
    return a;
  }
}

void WorkToAnsList(Work* w, AnsList* a){
  for(int i = 0; i < w->pos; i++)
    a->ans[i] = w->cand[i];
}

void append(Work* a1, Work* a2, Work* a3){
  int i, j;
  for(i = 0; i < a1->pos; i++)
    a3->cand[i] = a1-> cand[i];
  for(j = 0; j < a2->pos; j++)
    a3->cand[i+j] = a2->cand[j];
  a3->pos = i+j;
}

void generate(Work* a, int n){
  int i;
  for(i = 0; i < n; i++)
    a->cand[i] = i+1;
  a->pos = i;
}

void qudist(Work* plu, Work* ls, Work* lp, Sync<AnsList*> ans){
  if(plu->pos == 0) {
    if(ls->pos == 0) {
      AnsList* res = new AnsList;
      WorkToAnsList(lp,res);
      res->next = 0;
      ans.write(res);
      return;
    } else {
      ans.write(0);
      return;
    }
  } else {
    AnsList* check(int, int, Work*, Work*, Work*);

    int p = plu->pop();
    Work* lu = plu;
    Work lr;
    append(lu,ls,&lr);
    Work lp2 = *lp;

    ls->push(p);

    Sync<AnsList*> syncans2;
    SPAWN(qudist(lu, ls, lp, syncans2));
   
    AnsList* ans1 = check(p, 1, &lr, &lp2, lp);
    AnsList* ans2 = syncans2.read();

    ans.write(appendAnsList(ans1, ans2));
  }
}

AnsList* qu(Work* plu, Work* ls, Work* lp){
  if(plu->pos == 0) {
    if(ls->pos == 0) {
      AnsList* res = new AnsList;
      WorkToAnsList(lp,res);
      res->next = 0;
      return res;
    } else {
      return 0;
    }
  } else {
    AnsList* check(int, int, Work*, Work*, Work*);

    int p = plu->pop();
    Work* lu = plu;
    Work lr;
    append(lu,ls,&lr);
    Work lp2 = *lp;
    
    AnsList* ans1 = check(p, 1, &lr, &lp2, lp);
    ls->push(p);
    AnsList* ans2 = qu(lu, ls, lp);
    return appendAnsList(ans1, ans2);
  }
}

AnsList* check(int p, int d, Work* l, Work* qlp0, Work* lp){
  while(1){
    if(qlp0->pos == 0) {
      Work plp = *lp;
      plp.push(p);
      Work w;
      return qu(l,&w,&plp);
    } else {
      int q = qlp0->pop();
      if(q + d == p || q - d == p)
	return 0;
      else{
	d = d+1;
	continue;
      }
    }
  }
}

//int main(int argc, char* argv[])
int qu(int num)
{
/*  if(argc < 2) {
    printf("qu SIZE\n");
    exit(0);
  }

  int num = atoi(argv[1]);

  if(num > MAX_SIZE) {
    printf("SIZE is greater than MAX_SIZE (%d).\n", MAX_SIZE);
    exit(0);
  }
*/
  pards_init();

  DWORD t1, t2;
  int i;

  AnsList* res;

  printf("serial version\n");
  t1 = GetTickCount();
  Work gen1;
  generate(&gen1, num);
  Work w1, w2;
  res = qu(&gen1,&w1,&w2);
  t2 = GetTickCount();

  for(i = 0; res != 0; res = res->next, i++){
#ifdef DEBUG
    for(int j = 0; j < num; j++)
      printf("%d ", res->ans[j]);
    printf("\n");
#endif
  }
  printf("num = %d\n",i);
  printf("elapsed time = %f sec\n",
	 (double)(t2 - t1)/1000.0);

  printf("parallel version\n");
  t1 = GetTickCount();
  Work gen2;
  generate(&gen2, num);
  Work w3, w4;
  Sync<AnsList*> ans;
  qudist(&gen2,&w3,&w4,ans);
  res = ans.read();
  t2 = GetTickCount();
  
  for(i = 0; res != 0; res = res->next, i++){
#ifdef DEBUG
    for(int j = 0; j < num; j++)
      printf("%d ", res->ans[j]);
    printf("\n");
#endif
  }
  printf("num = %d\n",i);
  printf("elapsed time = %f sec\n",
	 (double)(t2 - t1)/1000.0);

  pards_finalize();
  return 0;
}

#if 0
% Erlang version
-module(qu).
-export([qu/1, go/1, count/1]).

go(N) -> timer:tc(qu,count,[qu(N)]).

count(C) -> count(C,0).
count([_H|T],Crnt) -> count(T,Crnt+1);
count([],Crnt) -> Crnt.

append([A|X],Y) -> [A|append(X,Y)];
append([],Y) -> Y.

qu(N) -> qu(generate(N), [],[]).

generate(N) ->
        if N > 0 -> [N|generate(N - 1)];
           N == 0 -> []
        end.

qu([P|Lu],Ls,Lp) -> 
        Lr = append(Lu,Ls),
        Ans1 = check(P,1,Lr,Lp,Lp),
        Ans2 = qu(Lu,[P|Ls],Lp),
        append(Ans1, Ans2);
qu([],[_|_],_) -> [];
qu([],[],Lp) -> [Lp].

check(P,D,L,[Q|Lp0],Lp) ->
        if (Q + D == P) or (Q - D == P) -> [];
           true -> D1 = D + 1, check(P,D1,L,Lp0,Lp)
	end;
check(P,_,L,[],Lp) -> qu(L,[],[P|Lp]).
#endif
#if 0
% eralang parallel version
-module(qupar).
-export([qu/1, qudist/4, go/1, count/1,qucount/1]).

go(N) -> timer:tc(qupar,qucount,[N]).
qucount(N)-> count(qu(N)).

count(C) -> count(C,0).
count([_H|T],Crnt) -> count(T,Crnt+1);
count([],Crnt) -> Crnt.

append([A|X],Y) -> [A|append(X,Y)];
append([],Y) -> Y.

qu(N) ->
      spawn(qupar,qudist,[generate(N), [], [], self()]),
      receive
        Ans -> Ans
      end.

generate(N) ->
        if N > 0 -> [N|generate(N - 1)];
           N == 0 -> []
        end.

qudist([P|Lu],Ls,Lp,Pid) -> 
	spawn(qupar,qudist,[Lu,[P|Ls],Lp,self()]),
        Lr = append(Lu,Ls),
        Ans1 = check(P,1,Lr,Lp,Lp),
	receive
	  Ans2 -> Ans = append(Ans1, Ans2)
        end, 
	Pid ! Ans;
qudist([],[_|_],_,Pid) -> Pid ! [];
qudist([],[],Lp,Pid) -> Pid ! [Lp].

qu([P|Lu],Ls,Lp) -> 
        Lr = append(Lu,Ls),
        Ans1 = check(P,1,Lr,Lp,Lp),
        Ans2 = qu(Lu,[P|Ls],Lp),
        append(Ans1, Ans2);
qu([],[_|_],_) -> [];
qu([],[],Lp) -> [Lp].

check(P,D,L,[Q|Lp0],Lp) ->
        if (Q + D == P) or (Q - D == P) -> [];
           true -> D1 = D + 1, check(P,D1,L,Lp0,Lp)
	end;
check(P,_,L,[],Lp) -> qu(L,[],[P|Lp]).
#endif
