(**************************************************************************)
(*                                 Cmmtest                                *)
(*                                                                        *)
(*   Robin Morisset, ENS & INRIA Paris-Rocquencourt                       *)
(*   Pankaj Pawan, IIT Kanpur & INRIA Paris-Rocquencourt                  *)
(*   Francesco Zappa Nardelli, INRIA Paris-Rocquencourt                   *)
(*                                                                        *)
(*  The Cmmtest tool is copyright 2012, 2013 Institut National de         *)
(*  Recherche en Informatique et en Automatique (INRIA).                  *)
(*                                                                        *)
(*  Redistribution and use in source and binary forms, with or without    *)
(*  modification, are permitted provided that the following conditions    *)
(*  are met:                                                              *)
(*  1. Redistributions of source code must retain the above copyright     *)
(*  notice, this list of conditions and the following disclaimer.         *)
(*  2. Redistributions in binary form must reproduce the above copyright  *)
(*  notice, this list of conditions and the following disclaimer in the   *)
(*  documentation and/or other materials provided with the distribution.  *)
(*  3. The names of the authors may not be used to endorse or promote     *)
(*  products derived from this software without specific prior written    *)
(*  permission.                                                           *)
(*                                                                        *)
(*  THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS    *)
(*  OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED     *)
(*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE    *)
(*  ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY       *)
(*  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL    *)
(*  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE     *)
(*  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS         *)
(*  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER  *)
(*  IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR       *)
(*  OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN   *)
(*  IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                         *)
(*                                                                        *)
(**************************************************************************)

open Str
open Types
open Util

(* *** trace marking algorithm *** *)

let opt_debug = ref false      (* INTERFACE *)

(* compares that the values x,y match for the minimum of size_x and size_y*) 
let compare_value x1 y1 = 
  match x1, y1 with
    | NonPointer x, NonPointer y -> x = y
    | Pointer x, Pointer y -> x = y
    | _ , _ -> false

let dump_table table st =
  print_endline "--------";
  Hashtbl.iter 
    (fun l (s,a,v,rel) ->
      let loc = string_of_location st l in
      if loc.[0] = 'g' || loc.[0] = 'a' then
        Printf.printf "%s: %d %b\n"
          loc s rel
    ) table;
  print_endline "--------"

let set_release table =
  (* question for OCaml expert, do we have to copy the table here? *)
  let table_copy = Hashtbl.copy table in
  Hashtbl.iter 
    (fun k (a,v,rel) -> 
      Hashtbl.replace table k (a,v,true))
    table_copy

let set_acquire table =
  (* if the release bit is set, then kill the dataflow, otherwise noop *)
  let table_copy = Hashtbl.copy table in
  Hashtbl.iter 
    (fun k (a,v,rel) -> 
      if rel then Hashtbl.remove table k)
    table_copy

let set_release_OW table =
  (* question for OCaml expert, do we have to copy the table here? *)
  let table_copy = Hashtbl.copy table in
  Hashtbl.iter 
    (fun k (ptr,ri,rel) -> 
      Hashtbl.replace table k (ptr,ri,true))
    table_copy

let set_acquire_OW table =
  (* if the release bit is set, then kill the dataflow, otherwise noop *)
  let table_copy = Hashtbl.copy table in
  Hashtbl.iter 
    (fun k (_,_,rel) -> 
      if rel then Hashtbl.remove table k)
    table_copy

(* for each location, var_addresses records the current value, 
 , if the last operation was a write or read, and if a release
   action has been encountered since the last access. *)
let var_addresses = (Hashtbl.create 15 : (loc_bo, (access * value * bool)) Hashtbl.t) 

let dump_var_addresses st =
  print_endline "--------";
  Hashtbl.iter (fun l (a,v,b) -> 
    let loc = string_of_loc_bo st l in
    if loc.[0] = 'g' || loc.[0] = 'a' then
    Printf.printf ": %s : %s - %s - %b\n" 
      loc (match a with R -> "R" | W -> "W" | I -> "I") (string_of_value v st) b)
    var_addresses;
  print_endline "--------"
      
let rec mark trace irreads (loaddeps:(int*int list)list) aft_sc st 
    : annot_event list = 
  match trace with
    | [] -> 
      []
    | ah :: t -> 
      ( (* dump_var_addresses st; *)
        match ah.evt with
      | Init (l,v,s) -> 
          let l = loc_bo_of_loc l in
          (* print_endline ("added "^(string_of_location l st)); *)
          Hashtbl.replace var_addresses l (I, v, false);
          ah :: (mark t irreads loaddeps false st)
      | Store (l,v,s) ->
          let l = loc_bo_of_loc l in
          (* print_endline ("\nmarking write "^(string_of_location st l)); *)
          if (List.length irreads > 0) && (ah.split_index == List.hd irreads) then begin
            Printf.printf "\nIR format**** split_index:%d  hd:%d\n" ah.split_index (List.hd irreads);
            assert(false);
          end;
          if Hashtbl.mem var_addresses l then
            let (op,oldV,rel) = Hashtbl.find var_addresses l in 
            match op with
              | R -> 
                if compare_value v oldV then begin 
                  ah.redundant := WAR;
                    (* TODO: unclear here *)
                  Hashtbl.replace var_addresses l (W, oldV, rel) 
               (* Hashtbl.replace var_addresses l (oldS, W, oldV, false) *)
                end else
                  Hashtbl.replace var_addresses l (W, v, false)
            | W | I -> 
              (* print_endline "in WAW"; *)
              (* write after write elimination should be correct when *)
              (* the latter write rewrites the same value and there is non r/a pair *)
              (* in between *)
              if compare_value v oldV
              then ah.redundant := WAW []; 
              Hashtbl.replace var_addresses l (W, v, false)
          else (
            (* print_endline "found nothing"; *)
            Hashtbl.replace var_addresses l (W, v, false) );
        ah :: (mark t irreads loaddeps false st)
      | Load (l,v,s)-> 
          let l = loc_bo_of_loc l in
        (* print_endline ("\nmarking load: "^(string_of_location st l)); *)
        (* first check for dependent IR *)
        (* warning: must apply -1 to get the right index during analysis *)
        let dll =
          List.concat (option_map (fun (x,y) -> if x = ah.split_index then Some y else None) loaddeps) in
        ( match dll with
          | [] -> ()
          | _ -> ah.redundant := IRR (List.map (fun x -> x-1) dll));
        (* the usual analysis *)
        if List.mem ah.split_index irreads 
        then ah.redundant := IRR [];
        if Hashtbl.mem var_addresses l then begin
          (* print_endline ("read found " ^ (string_of_location l st)); *)
          let (op,oldV,rel) = Hashtbl.find var_addresses l in 
          if op = R then
            ah.redundant := RAR
            (*no need to update the hash table entry *)
          else (* op was a W*)
            (ah.redundant := RAW;
               (*no need to compare values as our traces are sequential *)
               (* again, unclear here *)
             Hashtbl.replace var_addresses l (R, oldV, rel))
            (* Hashtbl.replace var_addresses l (R, oldV, false)) *)
        end else 
          Hashtbl.replace var_addresses l (R, v, false);
        (* for ARM, mark as UIL the u_ accesses *)
        if (string_of_loc_bo st l).[0] = 'u' 
        then ah.redundant := UIL;
        (* keep going *)
        ah :: (mark t irreads loaddeps false st)

      (* volatile accesses are not eliminable and have no effect on
	 other eliminations *)
      | VStore (l,v,s) -> 
        ah :: (mark t irreads loaddeps false st)

      | VLoad (l,v,s) -> 
        (* Antoniu suggests that it is sound to optimise away an unused volatile read *)
        if List.mem ah.split_index irreads 
        then ah.redundant := IRR [];
        ah :: mark t irreads loaddeps false st

      (* Clear any previous information at any flush or atomic
	 instruction except relaxed *)
      | AStore (a,l,v,s) -> 
        ( match a with
          | Relaxed -> ()
          | Release | Seq_cst -> set_release var_addresses 
	  | Acquire -> error "internal: AStore Acquire in mark" );
        ah :: (mark t irreads loaddeps (is_seq_cst a) st)

      | ALoad (a,l,v,s) -> 
        ( match a with
          | Relaxed -> ()
          | Acquire | Seq_cst -> set_acquire var_addresses 
	  | Release -> error "internal: ALoad Release in mark" );
        (* TODO: why false here, instead of 'is_seq_cst a' *)
        ah :: mark t irreads loaddeps false st

      | Lock _ -> 
        set_acquire var_addresses;
        ah :: mark t irreads loaddeps false st

      | Unlock _ -> 
        set_release var_addresses;
        ah :: mark t irreads loaddeps false st

      | Flush -> 
        if not aft_sc then ah.redundant := RSF;
        ah :: (mark t irreads loaddeps false st)
      )

(* scan the trace, when it finds a W followed by redundant reads and
another store to the same location (and possibly other actions at
different locations) mark the W as OW with the list of indexed of
redundant reads *)
(* PROBLEM: how to deal with irrelevant reads here is unclear *)
(* ASSUME SIZE IS ALWAYS 1 *)
let mark_ow_writes st (trace:annot_event list) : unit = 
  let var_ow = (Hashtbl.create 15 : (int*int, (rr ref * int list * bool)) Hashtbl.t) in

  (* let dump_var_ow () = *)
  (*   print_endline "--------"; *)
  (*   Hashtbl.iter  *)
  (*     (fun (lb,lo) (_,_,rel) -> *)
  (*       Printf.printf "%d,%d\n" lb lo *)
  (*     ) var_ow; *)
  (*   print_endline "--------" in *)

  (* to maintain the INIT count to correctly identify the indices as
     they are removed from array when passed to matchTraces *)
  let rec mark_ow (trace:annot_event list) index = 
    match trace with
      | [] -> ()
      | h :: t -> 
        (* Printf.printf "event: %s\n" (string_of_annot_event NoInit h st); *)
        match h.evt with
          | Init _ -> 
            mark_ow t index
          | Store (l,v,s) ->
            let l = loc_bo_of_loc l in
            if Hashtbl.mem var_ow l then begin
              let (ptr, redundant_indices, rel) = Hashtbl.find var_ow l in 
              if !ptr = NotRedundant || redundant_indices = [] then 
                ptr := OW redundant_indices;
              Hashtbl.replace var_ow l (h.redundant, [], rel)
            end else begin
              Hashtbl.replace var_ow l (h.redundant, [], false)
            end;
            mark_ow t (index+1)
          | Load (l,v,s) -> 
            let l = loc_bo_of_loc l in
            if Hashtbl.mem var_ow l then
              (match !(h.redundant) with
                | NotRedundant -> Hashtbl.remove var_ow l
                | _ -> let (rr, index_list, rel) = Hashtbl.find var_ow l in
                      Hashtbl.replace var_ow l (rr , index::index_list, rel));
            mark_ow t (index+1)
          (* flush is a noop for this dataflow *)  
          | Flush -> mark_ow t (index+1)
          (* Clear any previous information at any flush or atomic instruction *)
          | Lock _ -> 
            set_acquire_OW var_ow; mark_ow t (index+1)
          | Unlock _ -> 
            set_release_OW var_ow; mark_ow t (index+1)

          | VLoad (l,v,s) -> 
	    mark_ow t (index+1)
          | VStore (l,v,s) -> 
	    mark_ow t (index+1)

          | ALoad (a,l,v,s) -> 
            ( match a with
              | Relaxed -> ()
              | Acquire | Seq_cst -> set_acquire_OW var_ow
	      | Release -> error "internal: ALoad Release in mark_ow" );
            mark_ow t (index+1)
          | AStore (a,l,v,s) -> 
            ( match a with
              | Relaxed -> ()
              | Release | Seq_cst -> set_release_OW var_ow 
	      | Acquire -> error "internal: AStore Acquire in mark_ow" );
            mark_ow t (index+1)
          (* | AStore _ | ALoad _ | Lock _ | Unlock _ -> *)
          (*   Hashtbl.clear var_ow; *)
          (*   mark_ow t (index+1) *)
  in
  mark_ow trace 0

let mark_WAW_chains_writes st (trace:annot_event list) : unit =
  let debug = false in 
  let waw_chains = (Hashtbl.create 15 (* : ((int * int) (value * int list)) Hashtbl.t *) ) in
  let dump_waw_chains () =
    print_endline "*** WAW chains";
    let hdump = ref [] in 
    Hashtbl.iter (fun l (v,deps) -> hdump := (l,v,deps)::!hdump) waw_chains;
    let hdumpsorted = List.sort (fun ((_,o1),_,_) ((_,o2),_,_) -> o1-o2) !hdump in
    List.iter (fun ((l,o),v,deps) -> 
      let ls = (string_of_location st (l,o,[])) in
      if ls.[0] = 'c' || ls.[2] = 'm' then () else
        Printf.printf "loc: %s -- value: %s -- deps: %s\n" ls (string_of_value v st) (String.concat "," (List.map string_of_int deps)))
      hdumpsorted;
  in
  let rec aux trace idx =
    match trace with
      | [] -> if debug then dump_waw_chains (); ()
      | ah :: t ->
        ( if debug then print_endline "***********";
          match ah.evt with
          | Init (l,v,s) ->
            let l = loc_bo_of_loc l in
            Hashtbl.replace waw_chains l (v,[]);
            aux t 0
          | Store (l,v,s) ->
            let l = loc_bo_of_loc l in
            if debug then dump_waw_chains ();
            if debug then print_endline "--- event";
            if debug then print_endline (string_of_event st ah.evt);
            if Hashtbl.mem waw_chains l then
              let (ov,deps) = Hashtbl.find waw_chains l in
              if !(ah.redundant) = NotRedundant
              then begin
                if v = ov then begin
                  if debug then print_endline "WAW detected";
                  ah.redundant := WAW deps;
                  Hashtbl.replace waw_chains l (v,[]); 
		  (* WHAT DEPS HERE IS CRITICAL FOR THE HEURISTICS *)
                  aux t (idx+1)
                end else begin
                  Hashtbl.replace waw_chains l (v,[]);
                  aux t (idx+1)
                end end else begin
                  if debug then print_endline "already redundant";
                  Hashtbl.replace waw_chains l (ov,idx::deps);
                  aux t (idx+1)
                end
            else begin
                  Hashtbl.replace waw_chains l (v,[]);
                  aux t (idx+1)
            end
              
          | Load (l,v,s) ->
            let l = loc_bo_of_loc l in
            if debug then dump_waw_chains ();
            if debug then print_endline "--- event";
            if debug then print_endline (string_of_event st ah.evt);
            if !(ah.redundant) = NotRedundant
            then begin
              Hashtbl.remove waw_chains l; 
              aux t (idx+1)
            end else begin
              let (ov,deps) = 
                try Hashtbl.find waw_chains l 
                with Not_found -> (v,[]) in
              Hashtbl.replace waw_chains l (ov,idx::deps);
              aux t (idx+1)
            end

	  | VLoad _ | VStore _ -> aux t (idx+1)

          | ALoad _ | AStore _ | Flush | Lock _ | Unlock _ -> 
            (* heuristic: locks and unlocks clear the waw chains *)
            Hashtbl.reset waw_chains;
            aux t (idx+1) (* FIXME *)
        )
  in aux trace 0

let rec skip_init trace =
  match trace with
    | h::t when is_init h.evt -> skip_init t
    | _ -> trace

let merge_annot buf trace =
  let map_deps d = 
    ((List.nth (skip_init trace) d).split_index)-1 in
  let no_deps_write a =
    match a with
      | OW d | WAW d when not(d=[]) -> false
      | _ -> true in
  let merge_two a1 a2 =
    match a1,a2 with
      | NotRedundant, _ -> NotRedundant
      | _, NotRedundant -> NotRedundant
      | IRR _, _       -> a1 (* TODO, fix the dependencies *)
      | _, IRR _       -> a2 (* TODO, fix the dependencies *)
      | OW d1, OW d2   -> OW (d1@d2)
      | OW d, _        -> if no_deps_write a2 then a1 else CE
      | _, OW d        -> if no_deps_write a1 then a2 else CE
      | WAW d1, WAW d2 -> WAW (d1@d2)
      | WAW d, _       -> if no_deps_write a2 then a1 else CE
      | _, WAW d       -> if no_deps_write a1 then a2 else CE
      | RAW, RAR | RAR, RAW  -> RAW  (* observed *)
      | _, _           -> if a1 = a2 then a1 else CE in
  if buf = [] then ()
  else
    let mr = List.fold_left merge_two (!(List.hd buf)) (List.map (!) (List.tl buf)) in
    let mrm = match mr with
      | OW d -> OW (Util.remove_duplicates (List.map map_deps d))
      | WAW d -> WAW (Util.remove_duplicates (List.map map_deps d))
      | _ -> mr  in 
    List.iter (fun x -> x := mrm) buf

let mark_merge trace st =
  let rec aux t ctr buf =
    match t with 
      | [] -> merge_annot buf trace
      | ah::tl when is_init ah.evt -> aux tl ctr []
      | ah::tl -> 
        if ah.split_index = ctr 
        then aux tl ctr (ah.redundant::buf)
        else begin
          merge_annot buf trace;
          aux tl ah.split_index [ah.redundant]
        end
  in aux trace 1 []


let mark_idempotent_writes trace st : annot_event list =
  (* clears all other annotations *)
  let marked_trace = mark trace [] [] false st in
  List.iter
    (fun ev ->
      if !(ev.redundant) = WAR || !(ev.redundant) = WAW []
      then () 
      else ev.redundant := NotRedundant)
    marked_trace;
  marked_trace

let  mark_ir trace irreads loaddeps : annot_event list =
  let rec aux trace last =
    match trace with
      | [] -> []
      | ah :: t ->
        ( match ah.evt with
          | Init (l,v,s) -> 
            ah :: (aux t None)
          | Load _ | ALoad _ -> 
            (* first dependent IR *)
            let dll = 
              List.concat (option_map (fun (x,y) -> if x = ah.split_index then Some y else None) loaddeps) in
            ( match dll with
              | [] -> ()
              | _ -> ah.redundant := IRR (List.map (fun x -> x-1) dll));
            (* and then real IR *)
            if List.mem ah.split_index irreads 
            then ah.redundant := IRR [];
            ( match ah.evt, last with
              | Load (l,v,s), Some (Load (l1,v1,s1)) when l = l1 && s = s1 && v = v1 -> 
                ah.redundant := RAR;
              | _,_ -> ());
            ah :: (aux t (Some ah.evt))
          | _ -> 
            ah :: (aux t None) )
  in aux trace None

(* *** trace comparison algorithm *** *)

type per_byte_rr = 
  | PB_IRR of (int list * int list)
  | PB_RAW 
  | PB_RAR 
  | PB_OW of (int list * int list) 
  | PB_WAR 
  | PB_WAW of (int list * int list)
  | PB_RSF (* redundant store flush, for release and relaxed store *) 
  | PB_UIL (* reads from the .text section, for ARM only *)
  | PB_SZ
  | PB_NotRedundant 

let string_of_per_byte_rr = function
  | PB_NotRedundant -> "     "
  | PB_IRR ([],[]) ->"*IR * "
  | PB_IRR (l,g) -> "*IR {" ^ (String.concat "," (List.map string_of_int l))^" ; " 
    ^ (String.concat "," (List.map string_of_int g)) ^ "}* "
  | PB_RAW ->   "*RaW* "
  | PB_RAR ->   "*RaR* "
  | PB_OW ([],[]) -> "*OW * "
  | PB_OW (l,g) -> 
    "*OW {" ^ (String.concat "," (List.map string_of_int l))^" ; " 
    ^ (String.concat "," (List.map string_of_int g)) ^ "}* "
  | PB_WAR ->  "*WaR* "
  | PB_WAW ([],[]) ->  "*WaW* "
  | PB_WAW (l,g) ->  "*WaW {" ^ (String.concat "," (List.map string_of_int l))^" ; " 
    ^ (String.concat "," (List.map string_of_int g)) ^ "}* "
  | PB_RSF ->  "*RsF* " (*redundant store fence*)
  | PB_SZ  -> "*SZ* "
  | PB_UIL ->  "*UIL* "


type per_byte_event = {
  pb_evt : event;
  pb_rr : per_byte_rr;
  pb_split_index : int;  (* trace event the split event belongs to *)
  pb_global_index : int (* pointer to the event in the global per-byte trace *)
}

type per_byte_trace = loc * (per_byte_event list)

type per_byte_traces = per_byte_trace list 

let dump_per_byte_trace st (t:per_byte_event list) = 
  List.iter 
    (fun ee -> 
      Printf.printf " %s [%2d] [%3d] %s\n"
(*          (if is_redundant !(e.redundant) then "*" else " ") *)
        (string_of_event st ee.pb_evt)
        ee.pb_split_index 
        ee.pb_global_index
        (string_of_per_byte_rr ee.pb_rr))
    t 

let dump_per_byte_pair_traces st t1 t2 = 
  print_endline "********";
  dump_per_byte_trace st t1;
  print_endline "--------";
  dump_per_byte_trace st t2;
  print_endline "********"

let dump_per_byte_traces st (t:per_byte_traces) = 
  List.iter (fun (l,t) -> 
    Printf.printf "** AT: %s\n" (string_of_location st l);
    dump_per_byte_trace st t) 
    t 

(* HERE *)

let explode_trace st (locs:loc list) (trace:annot_event list) : per_byte_traces =
  let trace_no_init = skip_init trace in

  let find_pb_trace pb_traces (b,o,a) =
    let rec aux ts buf = 
      match ts with
        | [] -> None
        | ((b',o',a'),t)::tsl -> 
          if b=b' && o=o' && a=a' then Some (t, buf@tsl) 
          else aux tsl (((b',o',a'),t)::buf)
    in aux pb_traces [] 
  in
  
  let map_rr l rr =
    let map_deps deps =
      let rec map_deps_aux deps loc glob =
        match l with
          | Some l -> (
            match deps with
              | [] -> (loc,glob)
              | d::dt -> 
                let d = d+1 in
            (* find all the events that have split_index d *)
                let events_at_d = List.filter (fun e -> e.split_index = d) trace in
                try 
                  let _ = List.find (fun e -> 
                    match loc_of_ev e.evt with
                      | Some l' -> compare_locs l' l = 0
                      | None -> false) events_at_d in
                  let ld = 
                    let rec find_local_dep trace ctr =
                      match trace with
                        | [] -> error "internal: map_deps, can't map dep to local"
                        | e::et -> 
                      (* Printf.printf "  l: %s [d:%d] ;;; e: %s ; [si: %d]\n"  *)
                      (*   (string_of_location st l) d (string_of_location st (loc_of_ev e.evt)) e.split_index; *)
                          if compare_locs (Util.the (loc_of_ev e.evt)) l = 0 && e.split_index = d then ctr
                          else if compare_locs (Util.the (loc_of_ev e.evt)) l = 0 || is_atomic e.evt  then find_local_dep et (ctr+1) 
                          else find_local_dep et ctr in
                    find_local_dep trace_no_init 0 in
                  map_deps_aux dt (ld::loc) glob
                with Not_found -> map_deps_aux dt loc (d::glob))
          | None -> error "internal: map_deps_aux with l = None" in
      map_deps_aux deps [] [] in
    
    match rr with
      | IRR deps -> PB_IRR (map_deps deps)
      | RAW      -> PB_RAW
      | RAR      -> PB_RAR
      | OW deps  -> PB_OW (map_deps deps)
      | WAR      -> PB_WAR
      | WAW deps -> PB_WAW (map_deps deps)
      | RSF      -> PB_RSF
      | UIL      -> PB_UIL
      | CE       -> error "internal: CE in map_rr"
      | SZ       -> PB_SZ
      | NotRedundant -> PB_NotRedundant in

  let rec distribute trace i pb_traces =
    match trace with 
      | [] -> 
        List.map (fun (l,t) -> (l,List.rev t)) pb_traces
      | ae :: tl ->
        let ee =
          { pb_evt = ae.evt;
            pb_split_index = ae.split_index;
            pb_global_index = i;
            pb_rr = map_rr (loc_of_ev ae.evt) !(ae.redundant)
          } in
        if is_atomic ae.evt 
        then
          distribute tl (i+1) (List.map (fun (loc,t) -> (loc,ee::t)) pb_traces)
        else
          let loc = Util.the (loc_of_ev ae.evt) in
          match find_pb_trace pb_traces loc with
            | None -> distribute tl (i+1) ((loc, [ee])::pb_traces)
            | Some (t,o_pb_traces) -> distribute tl (i+1) ((loc, ee::t)::o_pb_traces)
  in
  let init_pb_traces = List.map (fun l -> (l,[])) locs in
  let unsorted_output = distribute trace_no_init 1 init_pb_traces in
  List.sort (fun (l1,t1) (l2,t2) -> compare_locs l1 l2) unsorted_output

let same_op_loc_val =
  fun x y ->
    match x, y with
    | Flush, Flush -> true
    | Init (l1,v1,_), Init (l2,v2,_) -> v1 = v2
    | Load (l1,v1,s1), Load (l2,v2,s2) 
    | Store (l1,v1,s1), Store (l2,v2,s2) -> v1 = v2
    | VLoad (l1,v1,s1), VLoad (l2,v2,s2) 
    | VStore (l1,v1,s1), VStore (l2,v2,s2) -> v1 = v2
    | ALoad (a1, l1, v1, _), ALoad (a2, l2, v2, _) -> same_loc l1 l2 && v1 = v2 && a1 = a2
    | AStore (a1, l1, v1, _), AStore (a2, l2, v2, _) -> same_loc l1 l2 && v1 = v2 && a1 = a2
    | Lock l1, Lock l2 -> same_loc l1 l2
    | Unlock l1, Unlock l2 -> same_loc l1 l2
    | _, _ -> false

(* the naive recursive version of the trace matching algorithm *)
let reorder_elim_check opt_analyse_write_only (tx: per_byte_event list) (ty: per_byte_event list) =
  let x = Array.of_list tx in
  let y = Array.of_list ty in

  let lenX, lenY = Array.length x, Array.length y in

  let rec try_elim_in_x deleted_x index_x deleted_y index_y to_delete =
    (* Printf.printf "elim_x index: %d %d\n" index_x index_y; *)

    let delete_x_and_re_check index_x to_delete =
      let old_deleted = deleted_x.(index_x) in
      deleted_x.(index_x) <- true;
      let temp = re_check deleted_x (index_x + 1) deleted_y index_y to_delete in
      deleted_x.(index_x) <- old_deleted;
      temp in

    match x.(index_x).pb_rr with 
      | PB_NotRedundant -> false 

      | PB_OW (ld,gd) ->
        (* FIXME *)
        (* Printf.printf "\n OW at_x: %d\n" index_x; *)
        List.iter (fun i ->         
(*          Printf.printf " - %d " i; *)
          deleted_x.(i) <- true) ld;
        let old_deleted = deleted_x.(index_x) in
        deleted_x.(index_x) <- true;
        let temp = re_check deleted_x (index_x + 1) deleted_y index_y (gd@to_delete) in
        deleted_x.(index_x) <- old_deleted;
        List.iter (fun i -> deleted_x.(i) <- false) ld;
        temp 

      | PB_WAW (ld,gd) ->
(*        Printf.printf "\n WAW at_x: %d\n" index_x; *)
        if List.for_all (fun i -> deleted_x.(i)) ld
        then delete_x_and_re_check index_x (gd@to_delete)
        else false

      | PB_IRR (ld,gd) ->
        if List.for_all (fun i ->
          match x.(i).pb_rr with
            | PB_NotRedundant -> false
            (* | PB_OW (_::_,_) | PB_OW (_,_::_) -> false FIXME, CHECK, WHY? *)
            | _ -> true) ld 
        then begin
          (* FIXME *)
          (* print_endline "IRR check ok"; *)
          List.iter (fun i -> deleted_x.(i) <- true) ld;
          let old_deleted = deleted_x.(index_x) in
          deleted_x.(index_x) <- true;
          let temp = re_check deleted_x (index_x + 1) deleted_y index_y (gd@to_delete) in
          deleted_x.(index_x) <- old_deleted;
          List.iter (fun i -> deleted_x.(i) <- false) ld;
          temp 
        end else begin       (*   print_endline "IRR check off"; *)
          false end

      | PB_RAW | PB_RAR | PB_WAR | PB_RSF | PB_SZ ->
        delete_x_and_re_check index_x to_delete

      | _ -> error "dependendencies unimplemented x"

  and try_elim_ir_in_y deleted_x index_x deleted_y index_y to_delete =
    
    let delete_y_and_re_check index_y to_delete =
      let old_deleted = deleted_y.(index_y) in
      deleted_y.(index_y) <- true;
      let temp = re_check deleted_x index_x deleted_y (index_y + 1) to_delete in
      deleted_y.(index_y) <- old_deleted;
      temp in

    match y.(index_y).pb_rr with
      | PB_IRR ([],[]) | PB_RAR | PB_WAR | PB_WAW ([],[]) | PB_OW ([],[]) -> delete_y_and_re_check index_y to_delete
      | PB_IRR (ld,gd) ->
        if List.for_all (fun i ->
          match y.(i).pb_rr with
            | PB_NotRedundant -> false
            | PB_OW (_::_,_) | PB_OW (_,_::_) -> false
            | _ -> true) ld 
        then begin
          (* FIXME *)
          (* print_endline "IRR check ok"; *)
          List.iter (fun i -> deleted_y.(i) <- true) ld;
          let old_deleted = deleted_y.(index_y) in
          deleted_y.(index_y) <- true;
          let temp = re_check deleted_x index_x deleted_y (index_y+1) (gd@to_delete) in
          deleted_y.(index_y) <- old_deleted;
          List.iter (fun i -> deleted_y.(i) <- false) ld;
          temp 
        end else begin       (*   print_endline "IRR check off"; *)
          false end

      | PB_NotRedundant -> false
      | PB_OW d -> false
      | PB_SZ -> delete_y_and_re_check index_y to_delete
      | _ -> error ("internal: an element of the optimised trace has been marked "
                    ^ "eliminable with something else than an SZ, IR, OW, WAR, WAW or RAR.")

  and try_elim_flush_in_y deleted_x index_x deleted_y index_y to_delete =
    match y.(index_y).pb_evt with
      | Flush -> re_check deleted_x index_x deleted_y (index_y + 1) to_delete
      | _ -> false 

  and re_check deleted_x index_x deleted_y index_y to_delete =
    (* Printf.printf "re_check index: %d:%d %d:%d\n" lenX index_x lenY index_y; *)

    if lenX > index_x && deleted_x.(index_x)
    then re_check deleted_x (index_x + 1) deleted_y index_y to_delete
    else if lenY > index_y && deleted_y.(index_y)
    then re_check deleted_x index_x deleted_y (index_y + 1) to_delete

    (* analyse_write_only option *)
    (* else if opt_analyse_write_only && lenX > index_x && is_na_read (x.(index_x).pb_evt) *)
    (* then re_check deleted_x (index_x + 1) deleted_y index_y to_delete *)
    (* else  if opt_analyse_write_only && lenY > index_y && is_na_read (y.(index_y).pb_evt) *)
    (* then re_check deleted_x index_x deleted_y (index_y + 1) to_delete *)

    (* main matching *)
    else
      match (lenX > index_x), (lenY > index_y) with
        | true, true ->
          (* print_endline "true,true"; *)
          ( same_op_loc_val x.(index_x).pb_evt y.(index_y).pb_evt 
            && re_check deleted_x (index_x+1) deleted_y (index_y+1) to_delete )
            
          || try_elim_in_x deleted_x index_x deleted_y index_y to_delete
          || try_elim_ir_in_y deleted_x index_x deleted_y index_y to_delete
          || try_elim_flush_in_y deleted_x index_x deleted_y index_y to_delete
              
        (* unmatched x remaining but y is over *)
        | true, false -> 
          try_elim_in_x deleted_x index_x deleted_y index_y to_delete
            
        (* unmatched y remaining but x is over *)
        | false, true -> 
          try_elim_ir_in_y deleted_x index_x deleted_y index_y to_delete
          || try_elim_flush_in_y deleted_x index_x deleted_y index_y to_delete

        | false, false ->  
          true
  in
  re_check (Array.make lenX false) 0 (Array.make lenY false) 0 []

let rec remove_init_annot t =
  match t with
  | [] -> []
  | hd::tl -> match hd.evt with
    | Init _ -> remove_init_annot tl
    | _ -> t

let rec remove_non_volatile t =
  match t with
  | [] -> []
  | hd::tl -> match hd with
    | VLoad _ | VStore _ -> hd :: (remove_non_volatile tl)
    | _ -> remove_non_volatile tl

(* project an unannotated trace on byte accesses *)

let project_byte st trace =
  let split (lb,lo,la) v s =
    let rec split_aux i evs =
      if i = s then evs else begin
        (* loc *)
        let nlo = lo + i in
        (* val *)
        match v with
          | NonPointer v ->
            let nv = Z.logand (Z.shift_right v (8*i)) (Z.of_int 0xFF) in
            split_aux (i+1) (((lb,nlo,la),NonPointer nv)::evs)
          | Pointer l -> split_aux (i+1) (((lb,nlo,la),Pointer l)::evs)
      end
    in split_aux 0 [] in
  let split_event ev index =
    match ev with
      | Init (l,v,s) -> 
        List.map (fun (ls,vs) -> (Init (ls,vs,1), index)) (split l v s)
      | Load (l,v,s) -> 
        List.map (fun (ls,vs) -> (Load (ls,vs,1), index)) (split l v s)
      | Store (l,v,s) -> 
        List.map (fun (ls,vs) -> (Store (ls,vs,1), index)) (split l v s)
      | _ -> [ev, index]
  in
  let rec project_aux trace proj index = 
    match trace with
      | ev :: tl -> 
        project_aux tl 
          ((split_event ev index)::proj) 
          (match ev with Init _ -> 1 | _ -> index+1)
      | [] -> 
        List.map 
          (fun (ev,i) -> 
            { evt = ev;
              redundant = ref NotRedundant;
              split_index = i })
          (List.rev (List.flatten proj))
  in project_aux trace [] 1

(* project an annotated trace on byte accesses *)

let annot_project_byte dwarf st (trace:annot_event list) =
  let split (lb,lo,la) v s =
    let rec split_aux i evs =
      if i = s then evs else begin
        (* loc *)
        let nlo = lo + i in
        (* TODO: remap the attributes *)
        let nla,rnlo,_ = (* la in *)
          let var_name = string_of_loc_base st lb in
          let type_tag = Dwarfparse.compute_type_tag dwarf (the (Dwarfparse.find_var_type_node dwarf var_name)) in
          Dwarfparse.resolve_address type_tag 0 nlo in
        (* val *)
        match v with
          | NonPointer v ->
            let nv = Z.logand (Z.shift_right v (8*i)) (Z.of_int 0xFF) in
            split_aux (i+1) (((lb,rnlo,nla),NonPointer nv)::evs)
          | Pointer l -> split_aux (i+1) (((lb,rnlo,la),Pointer l)::evs)
      end
    in split_aux 0 [] in
  let split_annot_event (ev:annot_event) index =
    match ev.evt with
      | Init (l,v,s) ->
        List.map (fun (ls,vs) -> (Init (ls,vs,1), !(ev.redundant), index)) (split l v s)
      | Load (l,v,s) ->
        let evs = List.rev (split l v s) in
        let ev1 = (fun (ls,vs) -> Load (ls,vs,1), !(ev.redundant), index) (List.hd evs) in
        let evt = List.map (fun (ls,vs) -> 
          (Load (ls,vs,1), 
           (if is_redundant_if_deps !(ev.redundant) || not (is_redundant !(ev.redundant)) 
           then SZ else !(ev.redundant)),
           index)) (List.tl evs) in
       List.rev (ev1::evt)
      | Store (l,v,s) ->
        let evs = List.rev (split l v s) in
        let ev1 = (fun (ls,vs) -> Store (ls,vs,1), !(ev.redundant), index) (List.hd evs) in
        let evt = List.map (fun (ls,vs) -> 
          (Store (ls,vs,1), 
           (if is_redundant_if_deps !(ev.redundant) || not (is_redundant !(ev.redundant)) 
           then SZ else !(ev.redundant)),
           index)) (List.tl evs) in
       List.rev (ev1::evt)

      | _ -> [ev.evt, !(ev.redundant), index]
  in
  let rec project_aux (trace:annot_event list) proj index =
    match trace with
      | ev :: tl ->
        project_aux tl
          ((split_annot_event ev index)::proj)
          (match ev.evt with Init _ -> 1 | _ -> index+1)
      | [] ->
        List.map
          (fun (ev,r,i) ->
            { evt = ev;
              redundant = ref r;
              split_index = i })
          (List.rev (List.flatten proj))
  in project_aux trace [] 1

(* copy the annotations to the original, un-splitted trace *)

(* TODO: fix complexity *)

let copy_annot trace byte_trace =
  let find_annot idx =
    let ev = List.find (fun e -> e.split_index = idx && not (is_init e.evt)) byte_trace in
    !(ev.redundant) in
  let rec aux trace idx acc = 
    match trace with
      | [] -> List.rev acc
      | h::t when is_init h -> 
        let nh = { evt = h; redundant = ref NotRedundant; split_index = 0 } in
        aux t idx (nh::acc)
      | h::t -> 
        let nh = { evt = h; redundant = ref (find_annot idx); split_index = 0 } in
        aux t (idx+1) (nh::acc)
  in aux trace 1 []

(* project an annotated trace on the different global variables *)

let collect_variables trace st =
  print_endline "collect variables";
  let rec aux trace vars =
    match trace with
      | ev :: tl -> 
        ( match ev.evt with
          | Init (l,_,_) -> 
            if String.length (string_of_location st l) > 2 then
              if (string_of_location st l).[1] = 'r' then aux tl vars else aux tl (l::vars)
            else aux tl vars
          | _ -> vars )
      | _ -> vars in
  aux trace []
 
let project_var st trace var =
  let rec aux trace proj = 
    match trace with 
      | [] -> List.rev proj
      | ev::tl -> 
        if is_atomic ev.evt then aux tl (ev::proj)
        else let (lb,lo,la) = Util.the (loc_of_ev ev.evt) in
             if (lb, 0, []) = var && not (is_init ev.evt)  (* CHECK *)
             then aux tl (ev::proj)
             else aux tl proj in
  aux trace []

let project trace_unopt trace_opt st =
  let vars = collect_variables trace_unopt st in
  Printf.printf "\nmatching: %s\n" (String.concat " " (List.map (string_of_location st) vars));
  List.map (fun var -> 
    (var, project_var st trace_unopt var, project_var st trace_opt var))
    vars

(* match the spine *)

let same_atomic e1 e2 =
  match e1, e2 with
    | ALoad (a1, l1, v1, s1), ALoad (a2, l2, v2, s2)
    | AStore (a1, l1, v1, s1), AStore (a2, l2, v2, s2) -> 
      l1 = l2 && v1 = v2 && a1 = a2 && s1 = s2
    | Lock l1, Lock l2 | Unlock l1, Unlock l2 -> 
      l1 = l2
    | _,_ -> false

let rec match_spine trace_unopt trace_opt =
  match trace_unopt, trace_opt with
    | ue::ut, oe::ot -> 
      if not (is_atomic ue) then match_spine ut trace_opt
      else if not (is_atomic oe) then match_spine trace_unopt ot
      else if same_atomic ue oe then match_spine ut ot
      else false
    | ue::ut, [] -> 
      if not (is_atomic ue) then match_spine ut [] else false
    | [], oe::ot -> 
      if not (is_atomic oe) then match_spine [] ot else false
    | [], [] -> true

exception Success
exception SpineError
exception MarkError
exception MatchError (* of loc *)

let trace_copy trace =
  let rec aux trace buf =
    match trace with
      | [] -> List.rev buf
      | h::t ->
        let nev = { evt = h; redundant = ref NotRedundant; split_index = 0 } in
        aux t (nev::buf) in
  aux trace []

let rec compare_marking t1 t2 =
  match t1,t2 with
    | h1::tl1, h2::tl2 -> 
      let m =
        ( match !(h1.redundant), !(h2.redundant) with
          | OW d1, OW d2 -> (List.sort compare d1) = (List.sort compare d2)
          | a1, a2 -> a1 = a2) in
      if m 
      then compare_marking tl1 tl2
      else false
    | [], [] -> true
    | _,_ -> false

let both_empty t1 t2 =
  let is_empty t1 =
    let t = skip_init t1 in 
    if List.length t = 0 then true else false in
  is_empty t1 && is_empty t2

let match_per_byte st (pbt1:per_byte_traces) (pbt2:per_byte_traces) : bool =

  let compare_byte_traces l t1 t2 : bool (* fix the return type *)=
    if !opt_debug then begin
      print_endline ("comparing: "^(string_of_location st l));
      dump_per_byte_pair_traces st t1 t2
    end;
    reorder_elim_check false t1 t2 in

  let rec aux_match pbt1 pbt2 =
    match pbt1,pbt2 with
      | (l1,t1)::pbt1t, (l2,t2)::pbt2t ->
        ( match compare_locs l1 l2 with
          | 0  -> compare_byte_traces l1 t1 t2 && aux_match pbt1t pbt2t
          | -1 -> compare_byte_traces l1 t1 [] && aux_match pbt1t pbt2
          | 1  -> compare_byte_traces l2 [] t2 && aux_match pbt1  pbt2t
          | _  -> error "internal: compare_locs" )
      | (l1,t1)::pbt1t, [] -> compare_byte_traces l1 t1 [] && aux_match pbt1t []
      | [], (l2,t2)::pbt2t -> compare_byte_traces l2 [] t2 && aux_match [] pbt2t
      | [], [] -> true in
        
  (* print_endline "pbt1"; *)
  (* dump_per_byte_traces pbt1; *)
  (* print_endline "\n\npbt2"; *)
  (* dump_per_byte_traces pbt2; *)
  (* print_endline "\n\nmatching"; *)
  aux_match pbt1 pbt2

let new_match 
    trace_unopt ir_unopt ld_unopt co_unopt trace_opt ir_opt ld_opt co_opt st dwarf dump : bool =
  (* marking the unopt trace *)
  let byte_trace_unopt = project_byte st trace_unopt in
  let annot_byte_trace_unopt = mark byte_trace_unopt ir_unopt ld_unopt false st in
  mark_ow_writes st annot_byte_trace_unopt;
  mark_WAW_chains_writes st annot_byte_trace_unopt;
  (* print_endline "\n*** BEFORE MERGE CHECK ***\n"; *)
  (* dump_annot_trace All (annot_byte_trace_unopt,st); *)
  mark_merge annot_byte_trace_unopt st;
  (* print_endline "\n*** AFTER MERGE CHECK ***\n"; *)
  (* dump_annot_trace NoInit (annot_byte_trace_unopt,st); *)
  let annot_trace_unopt = copy_annot trace_unopt annot_byte_trace_unopt in

  (* marking the opt trace *)
  let byte_trace_opt = project_byte st trace_opt in
  let annot_byte_trace_opt = mark_idempotent_writes byte_trace_opt st in
  let annot_byte_trace_opt = mark_ir annot_byte_trace_opt ir_opt ld_opt in
  mark_ow_writes st annot_byte_trace_opt;
  mark_merge annot_byte_trace_opt st;
  let annot_trace_opt = copy_annot trace_opt annot_byte_trace_opt in
  (* dump the traces *)
  dump_annot_traces dump 
    (annot_trace_unopt) co_unopt 
    (copy_annot trace_opt annot_byte_trace_opt) co_opt st;

  (* and now attempt to match *)
  let byte_annot_trace_unopt = annot_project_byte dwarf st annot_trace_unopt in 
  let byte_annot_trace_opt = annot_project_byte dwarf st annot_trace_opt in 

  (* dump the byte-traces *)
  (* dump_annot_traces dump *)
  (*   (byte_annot_trace_unopt) co_unopt *)
  (*   (byte_annot_trace_opt) co_opt st; *)

  (* from here it is highly experimental *)
  let no_init_byte_annot_trace_unopt = remove_init_annot byte_annot_trace_unopt in
  let no_init_byte_annot_trace_opt = remove_init_annot byte_annot_trace_opt in

  let locs =
    let unopt_locs = Util.remove_duplicates 
      (Util.option_map (fun e -> loc_of_ev e.evt) no_init_byte_annot_trace_unopt) in
    let opt_locs = Util.remove_duplicates
      (Util.option_map (fun e -> loc_of_ev e.evt) no_init_byte_annot_trace_opt) in
    Util.remove_duplicates (unopt_locs @ opt_locs) in
    
  let per_bytes_unopt = explode_trace st locs no_init_byte_annot_trace_unopt in
  let per_bytes_opt = explode_trace st locs no_init_byte_annot_trace_opt in

  if false (* !opt_debug *) then begin
    print_endline "***\nUnoptimised exploded traces\n***\n";
    dump_per_byte_traces st per_bytes_unopt;
    print_endline "***\nOptimised exploded traces\n***\n";
    dump_per_byte_traces st per_bytes_opt
  end;

  match_per_byte st per_bytes_unopt per_bytes_opt

let same_volatile e1 e2 =
  match e1, e2 with
    | VLoad (l1, v1, s1), VLoad (l2, v2, s2)
    | VStore (l1, v1, s1), VStore (l2, v2, s2) -> 
      l1 = l2 && v1 = v2 && s1 = s2
    | _,_ -> false

let match_volatile trace_unopt co_unopt trace_opt co_opt st dwarf dump =

  dump_traces dump trace_unopt co_unopt trace_opt co_opt st;

  let trace_unopt_vo = 
    remove_non_volatile trace_unopt in
  let trace_opt_vo = 
    remove_non_volatile trace_opt in


  let rec aux tr_unopt tr_opt =
    match tr_unopt, tr_opt with
      | ue::ut, oe::ot -> 
        if same_volatile ue oe then aux ut ot else false
      | e::t, [] | [], e::t -> false
      | [], [] -> true in

  (* if List.length trace_unopt_vo != List.length trace_opt_vo *)
  (* then true *)
  (* else *) aux trace_unopt_vo trace_opt_vo

  (*   else no_init_byte_annot_trace_unopt in *)
  (* let no_init_byte_annot_trace_opt = *)
  (*   if opt_volatile_only then remove_non_volatile no_init_byte_annot_trace_opt *)
  (*   else no_init_byte_annot_trace_opt in *)


let match_volatile_cached trace_unopt co_unopt trace_opt co_opt st dwarf dump =
  let one_to_one = 
    match_volatile trace_unopt.events co_unopt trace_opt.events co_opt st dwarf dump in
  if one_to_one then false else begin
    true
  end  (* TODO *)



(* count the number of atomic/non_atomic accesses *)
      
let stats trace =
  let rec count t nal nas nvl nvs nl ns nf nlock nunlock =
    match t with
      | [] -> 
        { no_aload = nal; no_astore = nas; no_vload = nvl; no_vstore = nvs; 
	  no_load = nl; no_store = ns;
          no_flush = nf; no_lock = nlock; no_unlock = nunlock },
          nvl+nvs+nal+nas+nl+ns+nf+nlock+nunlock
      | h::t ->
        ( match h with
          | Load _ -> count t nal nas nvl nvs (nl+1) ns nf nlock nunlock
          | Store _ -> count t nal nas nvl nvs nl (ns+1) nf nlock nunlock
          | VLoad _ -> count t nal nas (nvl+1) nvs nl ns nf nlock nunlock
          | VStore _ -> count t nal nas nvl (nvs+1) nl ns nf nlock nunlock
          | ALoad _ -> count t (nal+1) nas nvl nvs nl ns nf nlock nunlock
          | AStore _ -> count t nal (nas+1) nvl nvs nl ns nf nlock nunlock
          | Init _ -> count t nal nas nvl nvs nl ns nf nlock nunlock
          | Flush -> count t nal nas nvl nvs nl ns (nf+1) nlock nunlock
          | Lock _ -> count t nal nas nvl nvs nl ns nf (nlock+1) nunlock
          | Unlock _ -> count t nal nas nvl nvs nl ns nf nlock (nunlock+1))
  in count trace 0 0 0 0 0 0 0 0 0 
  
let simple_stats logfile trace =
  let name = Filename.basename (Filename.chop_extension logfile) in
  let rec count t nal nas nvl nvs nl ns nf nlock nunlock=
    match t with
      | [] -> 
        Printf.printf 
          "%s: \t %d \t %d \t %d \t %d \t %d \t %d \t %d \t %d \t %d\n" 
          (align name 36) nal nas nvl nvs nl ns nf nlock nunlock; 
        flush stdout;
        nal+nas+nvl+nvs+nl+ns+nf+nlock+nunlock
      | h::t ->
        ( match h with
          | Load _   -> count t nal nas nvl nvs (nl+1) ns nf nlock nunlock
          | Store _  -> count t nal nas nvl nvs nl (ns+1) nf nlock nunlock
          | VLoad _  -> count t nal nas (nvl+1) nvs nl ns nf nlock nunlock
          | VStore _ -> count t nal nas nvl (nvs+1) nl ns nf nlock nunlock
          | ALoad _  -> count t (nal+1) nas nvl nvs nl ns nf nlock nunlock
          | AStore _ -> count t nal (nas+1) nvl nvs nl ns nf nlock nunlock
          | Init _   -> count t nal nas nvl nvs nl ns nf nlock nunlock
          | Flush    -> count t nal nas nvl nvs nl ns (nf+1) nlock nunlock
          | Lock _   -> count t nal nas nvl nvs nl ns nf (nlock+1) nunlock
          | Unlock _ -> count t nal nas nvl nvs nl ns nf nlock (nunlock+1))
  in count trace 0 0 0 0 0 0 0

let print_stat name stat = 
  Printf.printf 
    "%s: \t %d \t %d \t %d \t %d \t %d \t %d \t %d \t %d \t %d\n" 
    (align name 30) 
    stat.no_vload stat.no_vstore
    stat.no_aload stat.no_astore stat.no_load stat.no_store
    stat.no_flush stat.no_lock stat.no_unlock

let print_stats name_unopt stat_unopt name_opt stat_opt =
  Printf.printf "\n%s: \t vload \t vstore  aload \t astore  load \t store \t flush \t lock \t unlock\n" (align "" 30); 
  print_stat name_unopt stat_unopt;
  print_stat name_opt stat_opt;
  flush stdout
  
(* INTERFACE *)
let analyse opt_ir_replay opt_timeout dump opt_real_quiet 
    opt_volatile_only opt_volatile_cached 
    (trace_unopt,trace_opt,st,dwarf) = 
  (* match Trace_parse.parse_log unoptlog with  *)
  (*   | Some (trace_unopt, ir_unopt, ld_unopt, st_unopt, co_unopt) -> *)
  (*     let  Some (trace_opt, ir_opt, _, st, co_opt) = Trace_parse.parse_log_st optlog st_unopt in *)
  (*     let stat_unopt, nev = stats trace_unopt in *)
  (*     let stat_opt, _ = stats trace_opt in *)
  (*     let unoptname = Filename.basename (Filename.chop_extension unoptlog) in *)
  (*     let optname = Filename.basename (Filename.chop_extension optlog) in *)
  (*     let name =        *)
  (*    try String.sub unoptname 0 ((String.length unoptname)-6)  *)
  (*    with _ -> "" in *)
  (*     if nev < 10000  *)
  (*     then begin *)
  (*       let ir_opt =  *)
  (*    if opt_ir_replay then  *)
  (*      let obj = Filename.chop_extension optlog in *)
  (*      List.map (fun x -> x+1) (Irreplay.ir_analysis obj trace_opt)  *)
  (*    else ir_opt in *)
  (*    let outcome = *)
  (*      let f () =  *)
  (*        new_match  opt_write_only  *)
  (*          trace_unopt ir_unopt ld_unopt co_unopt trace_opt ir_opt co_opt st dump in *)
  (*      match opt_timeout with *)
  (*        | None -> Some (f()) *)
  (*        | Some t -> try Some (timeout f (float t)) with Timeout -> None in *)
  (*    if opt_real_quiet  *)
  (*    then begin *)
  (*      match outcome with *)
  (*        | None -> print_char '.' *)
  (*        | Some b -> if b then print_char '+' else print_char '-' *)
  (*    end *)
  (*    else begin *)
  (*      let outcome_string = *)
  (*        match outcome with *)
  (*          | None -> "timeout" *)
  (*          | Some b -> string_of_bool b in *)
  (*      print_stats unoptname stat_unopt optname stat_opt; *)
  (*      Printf.printf "%70s : %s\n" name outcome_string; *)
  (*    end; *)
  (*    flush stdout; *)
  (*    outcome *)
  (*     end else begin *)
  (*    if opt_real_quiet then print_char '.' *)
  (*    else begin *)
  (*      print_stats unoptname stat_unopt optname stat_opt; *)
  (*      Printf.printf "%77s\n" "skipped";  *)
  (*    end; *)
  (*    flush stdout; *)
  (*    None *)
  (*     end *)
  (*   | None -> if opt_real_quiet then print_char '_' else Printf.printf "%77s\n" "skipped (during parsing)"; None *)

  let stat_unopt, nev = stats trace_unopt.events in
  let stat_opt, _ = stats trace_opt.events in

  let unoptname = trace_unopt.executable in
  let optname = trace_opt.executable in
  let name = 
    try String.sub unoptname 0 ((String.length unoptname)-6)
    with _ -> "" in
  if nev < 10000 || opt_volatile_only
  then begin
    let ir_unopt,ld_unopt,co_unopt = 
      trace_unopt.ir_reads, trace_unopt.load_deps, trace_unopt.comp_opts in
    let ir_opt,ld_opt,co_opt = 
      trace_opt.ir_reads, trace_opt.load_deps, trace_opt.comp_opts in
    let ir_unopt =
      if opt_ir_replay then
        let obj = trace_unopt.executable in
        List.map (fun x -> x+1) (Irreplay.ir_analysis obj trace_unopt.events)
      else ir_unopt in
    let ir_opt =
      if opt_ir_replay then
        let obj = trace_opt.executable in
        List.map (fun x -> x+1) (Irreplay.ir_analysis obj trace_opt.events)
      else ir_opt in

    let outcome =
      let f () =
        if opt_volatile_only 
        then
          match_volatile trace_unopt.events co_unopt trace_opt.events co_opt st dwarf dump 
        else if opt_volatile_cached 
        then 
          match_volatile_cached trace_unopt co_unopt trace_opt co_opt st dwarf dump    
        else 
          new_match
            trace_unopt.events ir_unopt ld_unopt co_unopt trace_opt.events ir_opt ld_opt co_opt st dwarf dump in
      match opt_timeout with
        | None -> Some (f())
        | Some t -> try Some (timeout f (float t)) with Timeout -> None in
    if opt_real_quiet
    then begin
      match outcome with
        | None -> print_char '.'
        | Some b -> if b then print_char '+' else print_char '-'
    end
    else begin
      let outcome_string =
        match outcome with
          | None -> "timeout"
          | Some b -> string_of_bool b in
      print_stats unoptname stat_unopt optname stat_opt;
      Printf.printf "%70s : %s\n" name outcome_string;
    end;
    flush stdout;
    outcome
  end else begin
    if opt_real_quiet then print_char '.'
    else begin
      print_stats unoptname stat_unopt optname stat_opt;
      Printf.printf "%77s\n" "skipped";
    end;
    flush stdout;
    None
  end


(* This is used for a fast analysis of atomic accesses - can probably be abandoned in favour of match_spine *)
 
let count_only (unoptlog, optlog) =  (* INTERFACE *)
  ()
  (* match Trace_parse.parse_log unoptlog with *)
  (*   | Some (trace_unopt, ir_unopt, ldunopt, st_unopt, _) -> *)
  (*     let Some (trace_opt, ir_opt, ldopt, st, _) = Trace_parse.parse_log_st optlog st_unopt in  *)
  (*     let rec count_only_int t rsc racq rrlx ssc srel srlx = *)
  (*    match t with *)
  (*      | [] ->  *)
  (*        Printf.printf  *)
  (*          " \t %d \t %d \t %d \t %d \t %d \t %d \n"  *)
  (*          rsc racq rrlx ssc srel srlx; flush stdout; *)
  (*        [rsc;racq;rrlx;ssc;srel;srlx] *)
  (*      | h::t -> *)
  (*        ( match h with *)
  (*          | Load _ | Store _ | Init _ | Flush | Lock _ | Unlock _ ->  *)
  (*            count_only_int t rsc racq rrlx ssc srel srlx *)
  (*          | ALoad (Seq_cst,_,_,_) ->               *)
  (*            count_only_int t (rsc+1) racq rrlx ssc srel srlx  *)
  (*          | ALoad (Acquire,_,_,_) ->               *)
  (*            count_only_int t rsc (racq+1) rrlx ssc srel srlx  *)
  (*          | ALoad (Relaxed,_,_,_) ->               *)
  (*            count_only_int t rsc racq (rrlx+1) ssc srel srlx  *)
  (*          | AStore (Seq_cst,_,_,_) ->              *)
  (*            count_only_int t rsc racq rrlx (ssc+1) srel srlx  *)
  (*          | AStore (Release,_,_,_) ->              *)
  (*            count_only_int t rsc racq rrlx ssc (srel+1) srlx  *)
  (*          | AStore (Relaxed,_,_,_) ->              *)
  (*            count_only_int t rsc racq rrlx ssc srel (srlx+1) ) *)
  (*     in  *)
  (*     Printf.printf "\t rsc \t racq \t rrlx \t ssc \t srel \t srel \n"; *)
  (*     let unopt_num = count_only_int trace_unopt 0 0 0 0 0 0 in *)
  (*     let opt_num = count_only_int trace_opt 0 0 0 0 0 0 in *)
  (*     let tmp_pass = Filename.basename (Filename.chop_extension unoptlog) in *)
  (*     let pass =  *)
  (*    try String.sub tmp_pass 0 ((String.length tmp_pass)-6)  *)
  (*    with _ -> "" in *)
  (*     Printf.printf "%60s : " pass;  *)
  (*     if compare unopt_num opt_num = 0  *)
  (*     then (print_endline "[co] true"; true)  *)
  (*     else (print_endline "[co] false"; false) *)
  (*   | None -> print_endline "skipped (during parsing)"; true *)
  
  

(* stats for the paper - TODO: not robust *)

let count_elim annot_trace_unopt =
  (* count now *)
  let t_c, raw_c, rar_c, ow_c, war_c, waw_c = ref 0, ref 0, ref 0, ref 0, ref 0, ref 0 in
  List.iter (
    fun ae -> 
      t_c := !t_c+1;
      match !(ae.redundant) with
      | RAW    -> raw_c := !raw_c + 1
      | RAR    -> rar_c := !rar_c + 1
      | OW  [] -> ow_c  := !ow_c + 1
      | WAR    -> war_c := !war_c + 1
      | WAW [] -> waw_c := !waw_c + 1 
      | _ ->   t_c := !t_c-1 )
    annot_trace_unopt;
  !t_c, [(RAW, !raw_c); (RAR, !rar_c); (OW [], !ow_c); (WAR, !war_c); (WAW [], !waw_c)]


let record_opt_stats unoptlog uts =   (* INTERFACE *)
  (* match Trace_parse.parse_log unoptlog with *)
  (*   | Some (trace_unopt, ir_unopt, ld_unopt, st, _) -> *)
  (*     let trace_length = List.length trace_unopt in *)
  (*     let filename, source_length = *)
  (*    match uts with  *)
  (*      | None -> *)
  (*        let filename =  *)
  (*          (String.sub (Filename.chop_extension unoptlog) 0 ((String.length (Filename.chop_extension unoptlog))-6) )^".c" in *)
  (*        filename, (count_lines filename)  *)
  (*      | Some u -> "*  ", u in *)
  (*     (\* marking the trace *\)  *)
  (*     let byte_trace_unopt = project_byte st trace_unopt in *)
  (*     let annot_byte_trace_unopt = mark byte_trace_unopt ir_unopt ld_unopt false st in *)
  (*     mark_ow_writes annot_byte_trace_unopt; *)
  (*     mark_merge annot_byte_trace_unopt st; *)
  (*     let annot_trace_unopt = copy_annot trace_unopt annot_byte_trace_unopt in *)
  (*     let tc, elim_stat = count_elim annot_trace_unopt in *)
  (*     let irc = List.length (List.filter (fun i -> !((List.nth annot_trace_unopt i).redundant) = NotRedundant) ir_unopt) in *)
  (*     ( match uts with  *)
  (*    | None -> *)
  (*      Printf.printf "%s : [source: %6d] [trace: %6d] [elim: %6d]" filename source_length trace_length (tc+irc); *)
  (*    | Some _ -> *)
  (*      Printf.printf "%s : [opt_r : %5d" filename (trace_length*100/source_length); *)
  (*      print_string "%] "; *)
  (*         Printf.printf "[trace: %6d] [elim: %6d]"  trace_length (tc+irc) ); *)
  (*     Printf.printf " [ratio: %3d" (((tc+irc)*100)/trace_length); print_string "%]"; *)
  (*     List.iter (fun (r,n) -> Printf.printf " [%s: %4d]" (string_of_rr r) n) ((IRR [], irc)::elim_stat); *)

  (*     let entry = { *)
  (*    source_size = source_length; *)
  (*    trace_size = trace_length; *)
  (*    no_eliminable = tc+irc; *)
  (*    no_elim_details = (IRR [], irc)::elim_stat; *)
  (*     } in *)
  (*     Types.stats := entry::!Types.stats; *)
  (*     print_endline ""; *)
  (*     (true, trace_length) *)
  (*   | None -> print_endline "skipped (during parsing)"; *) 
  (true,1)

let average l = 
  let rec aux l s t =
    match l with
      | [] -> s / t
      | f::r -> aux r (f+s) (t+1)
  in aux l 0 0

let median l =
  let l = List.sort (compare) l in
  let s = List.length l in
  if s mod 2 = 0 then ((List.nth l ((s/2)-1)) + (List.nth l  (s/2))) /2 
  else List.nth l (s/2)

let std_dev l a =
  let a = float a in
  let rec aux l s t =
    match l with
      | [] -> sqrt (s /. (float (t-1)))
      | f::r -> aux r (((float f)-.a)*.((float f)-.a) +. s) (t+1)
 in aux l 0. 0

let max l =
  let rec aux l m =
    match l with
      | f::r -> if f > m then aux r f else aux r m
      | [] -> m
  in aux l 0

let av_std_dev l = 
  let a = average l in
  let d = median l in 
  let m = max l in
  let s = std_dev l a in
  Printf.sprintf "%4d (%4d) (%4d) (%4.2f)" a d m s

let stats_process () =
  print_endline "*****************";
  Printf.printf "[avg. source   : %s]\n" (av_std_dev (List.map (fun e -> e.source_size) !Types.stats));
  Printf.printf "[avg. trace    : %s]\n" (av_std_dev (List.map (fun e -> e.trace_size) !Types.stats));
  Printf.printf "[avg. elim     : %s]\n" (av_std_dev (List.map (fun e -> e.no_eliminable) !Types.stats));

  List.iter (fun r ->
    Printf.printf "[avg. %s    : %s]\n" (string_of_rr r) (av_std_dev (List.map (fun e -> (List.assoc r e.no_elim_details)) !Types.stats)))
    [IRR[];RAW;RAR;OW [];WAR;WAW []]


(*  List.iter (fun x -> Printf.printf "%d " x.trace_size) !Types.stats *)
