(**************************************************************************)
(*                                 Cmmtest                                *)
(*                                                                        *)
(*   Robin Morisset, ENS & INRIA Paris-Rocquencourt                       *)
(*   Pankaj Pawan, IIT Kanpur & INRIA Paris-Rocquencourt                  *)
(*   Francesco Zappa Nardelli, INRIA Paris-Rocquencourt                   *)
(*                                                                        *)
(*  The Cmmtest tool is copyright 2012, 2013 Institut National de         *)
(*  Recherche en Informatique et en Automatique (INRIA).                  *)
(*                                                                        *)
(*  Redistribution and use in source and binary forms, with or without    *)
(*  modification, are permitted provided that the following conditions    *)
(*  are met:                                                              *)
(*  1. Redistributions of source code must retain the above copyright     *)
(*  notice, this list of conditions and the following disclaimer.         *)
(*  2. Redistributions in binary form must reproduce the above copyright  *)
(*  notice, this list of conditions and the following disclaimer in the   *)
(*  documentation and/or other materials provided with the distribution.  *)
(*  3. The names of the authors may not be used to endorse or promote     *)
(*  products derived from this software without specific prior written    *)
(*  permission.                                                           *)
(*                                                                        *)
(*  THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS    *)
(*  OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED     *)
(*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE    *)
(*  ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY       *)
(*  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL    *)
(*  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE     *)
(*  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS         *)
(*  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER  *)
(*  IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR       *)
(*  OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN   *)
(*  IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                         *)
(*                                                                        *)
(**************************************************************************)

open Util

(* debug *)

let debug s =
  if true then print_endline s

(* types *)

type ptr = int

type dw_unknown =
    { unk_at : ptr }

type dw_variable =
    { var_at    : ptr;
      var_name  : string;
      var_typ   : ptr }

type dw_pointer_type =
    { ptr_at    : ptr;
      ptr_size  : int }

type dw_array_type =
    { arr_at    : ptr;
      arr_typ   : ptr }

type dw_subrange_type =
    { sub_at    : ptr;
      sub_typ   : ptr;
      sub_upp_b : int }

type dw_base_type =
    { typ_at    : ptr;
      typ_size  : int }

type dw_structure_type = 
    { str_at    : ptr;
      str_size  : int }

type dw_member = 
    { mem_at    : ptr;
      mem_name  : string;
      mem_type  : ptr;
      mem_loc   : int }

type dw_typedef =
    { tdf_at    : ptr;
      tdf_typ   : ptr }

type dw_volatile =
    { vol_at    : ptr;
      vol_typ   : ptr }

type node =
  | DW_TAG_variable of dw_variable
  | DW_TAG_pointer_type of dw_pointer_type
  | DW_TAG_base_type of dw_base_type
  | DW_TAG_array_type of dw_array_type
  | DW_TAG_subrange_type of dw_subrange_type
  | DW_TAG_structure_type of dw_structure_type
  | DW_TAG_member of dw_member
  | DW_TAG_typedef of dw_typedef
  | DW_TAG_volatile of dw_volatile
  | DW_TAG_unknown of dw_unknown

let dump_node n =
  match n with
    | DW_TAG_variable v ->
	Printf.printf "{ <%x> [VAR] name: %s; typ: <%x> }\n" 
	  v.var_at v.var_name v.var_typ
    | DW_TAG_pointer_type p ->
	Printf.printf "{ <%x> [PTR] size: %d }\n" p.ptr_at p.ptr_size 
    | DW_TAG_base_type t ->
	Printf.printf "{ <%x> [TYP] size: %d }\n" t.typ_at t.typ_size 
    | DW_TAG_array_type a ->
	Printf.printf "{ <%x> [ARR] typ: <%x> }\n" a.arr_at a.arr_typ
    | DW_TAG_subrange_type s ->
	Printf.printf "{ <%x> [SUB] typ: <%x>; upp_b: %d }\n" 
	  s.sub_at s.sub_typ s.sub_upp_b
    | DW_TAG_structure_type s ->
	Printf.printf "{ <%x> [STR] size: %d }\n" s.str_at s.str_size 
    | DW_TAG_member m ->
	Printf.printf "{ <%x> [MEM] name: %s; typ: <%x>; loc: %d }\n" 
	  m.mem_at m.mem_name m.mem_type m.mem_loc
    | DW_TAG_typedef t ->
	Printf.printf "{ <%x> [TDF] typ: %d }\n" t.tdf_at t.tdf_typ 
    | DW_TAG_volatile v ->
	Printf.printf "{ <%x> [VOL] typ: %d }\n" v.vol_at v.vol_typ 
    | DW_TAG_unknown u ->
	Printf.printf "{ <%x> [UNK] }\n" u.unk_at

type type_tag =
  | TBase of (* size *) int
  | TArr of (* size of all sub-arrays *) int list * (* tag of the elements *) type_tag
  | TStr of (* in progress *) (string * int * type_tag) list

let rec dump_type_tag tt =
  match tt with
    | TBase s -> Printf.sprintf "base type or pointer of size: %d" s
    | TArr (l,_) -> 
      Printf.sprintf "array type, sizes: %s" 
	(String.concat " - " (List.map string_of_int l))
    | TStr l -> "structure\n" ^
      (String.concat "\n" 
        (List.map (fun (s,o,t) -> Printf.sprintf "  <%s: %d, %s>" s o (dump_type_tag t)) l))

(* auxiliary functions *)

let at n =
  match n with
  | DW_TAG_variable v -> v.var_at
  | DW_TAG_pointer_type p -> p.ptr_at
  | DW_TAG_base_type b -> b.typ_at
  | DW_TAG_array_type a -> a.arr_at
  | DW_TAG_subrange_type s -> s.sub_at
  | DW_TAG_structure_type s -> s.str_at
  | DW_TAG_member m -> m.mem_at
  | DW_TAG_typedef t -> t.tdf_at
  | DW_TAG_volatile v -> v.vol_at
  | DW_TAG_unknown u-> u.unk_at 

let is_global_variable n =
  match n with
    | DW_TAG_variable v -> v.var_name.[0] = 'g' || v.var_name.[0] = 'a'
    | _ -> false

let find_node_at dwarf ptr =
  List.find (fun n -> at n = ptr) dwarf

let rec find_next_node dwarf ptr =
  match dwarf with
    | [] -> error "find_next_node: not found"
    | h::t -> 
      if at h = ptr then 
	try List.hd t with _ -> error "find_next_node: hd empty"
      else find_next_node t ptr

let rec find_upp_b_subs dwarf ptr =
  let rec find_upp_b_subs_internal dwarf =
    match dwarf with
      | [] -> []
      | h::t -> 
	( match h with
	  | DW_TAG_subrange_type s -> s.sub_upp_b::(find_upp_b_subs_internal t)
	  | _ -> [] ) in
  match dwarf with
    | [] -> error "find_upp_b_subs: not found"
    | h::t -> 
      if at h = ptr then find_upp_b_subs_internal t
      else find_upp_b_subs t ptr

let rec find_mems dwarf ptr =
  let rec find_mems_internal dwarf =
    match dwarf with
      | [] -> []
      | h::t -> 
	( match h with
	  | DW_TAG_member m -> (m.mem_name, m.mem_type, m.mem_loc)::(find_mems_internal t)
	  | _ -> [] ) in

  match dwarf with
    | [] -> error "find_mems: not found"
    | h::t -> 
      if at h = ptr then find_mems_internal t
      else find_mems t ptr

(* parsing *)

let remove_paren s =
  String.sub s 1 ((String.length s)-2)

let is_node_start l =
  if String.length l > 0 && l.[1] = '<' then true else false

let rec skip_until_node_start fd = 
  try
    let l = input_line fd in
    if is_node_start l then Some l else skip_until_node_start fd
  with End_of_file -> ignore (Unix.close_process_in fd); None

let decompose_node_line at l =
  try
    let tag = 
      let il = (String.index l '>')+1 in
      let ir = String.index l ':' in
      String.trim (String.sub l il (ir-il)) in
    let value = 
      let il = (String.index l ':')+1 in
      String.trim (String.sub l il (String.length l-il)) in
    (tag,value)
  with Not_found -> error (Printf.sprintf "cannot decompose line: %s : at <%x>" l at)
    
let parse_name s =
  if s.[0] = '(' 
  then 
    (* clang adds some extra info we skip *)
    let il = (String.index_from s ((String.index s ':')+1) ':')+1 in
    String.trim (String.sub s il ((String.length s)-il))
  else s

let parse_variable fd at = 
  let name, typ = ref "", ref "" in
  let rec aux () =
    try
      let t1,v1 = decompose_node_line at (input_line fd) in
	match t1 with
	  | "DW_AT_name" -> name := parse_name v1; aux ()
	  | "DW_AT_type" -> typ := v1
	  | _ -> aux ()
    with End_of_file -> error (Printf.sprintf "malformed variable at: %x" at) in 
  aux ();
  let nl = skip_until_node_start fd in
  { var_at = at; var_name = !name; var_typ = int_of_string (remove_paren !typ) }, nl

let parse_pointer_type fd at =
  try
    let t1,v1 = decompose_node_line at (input_line fd) in
    let nl = skip_until_node_start fd in
    match t1 with
      | "DW_AT_byte_size" -> { ptr_at = at; ptr_size = int_of_string v1 }, nl
      | _ -> 
	  (* clang does not dump the byte_size field for pointers, assuming 8 *)
	  { ptr_at = at; ptr_size = 8 }, nl 
  with End_of_file -> error (Printf.sprintf "malformed pointer_type at: %x" at)

let parse_structure_type fd at =
  let rec aux () = 
    try
      let t1,v1 = decompose_node_line at (input_line fd) in
      match t1 with
	| "DW_AT_byte_size" -> 
	  let nl = skip_until_node_start fd in
	  { str_at = at; str_size = int_of_string v1 }, nl
	| _ ->  aux ()
    with End_of_file -> error (Printf.sprintf "malformed structure_type at: %x" at)
  in aux ()

let parse_member fd at =
  let name, typ = ref "", ref "" in
  let rec aux () =
    try
      let t,v = decompose_node_line at (input_line fd) in
	match t with
	  | "DW_AT_name" -> name := v; aux ()
	  | "DW_AT_type" -> typ := v; aux ()
	  | "DW_AT_data_member_location" -> 	  
	    let v1 = 
	      let il = String.index v '(' in String.sub v il ((String.index v ')') - il) in
	    let v2 = 
	      let il = String.index v1 ':' + 1in String.sub v1 il ((String.length v1) - il) in
	      { mem_at = at; mem_name = (parse_name !name); 
		mem_type = int_of_string (remove_paren !typ); 
		mem_loc = int_of_string (String.trim v2) }, 
	      skip_until_node_start fd
	  | _ -> aux ()
    with End_of_file -> error (Printf.sprintf "malformed member_type at: %x" at)
  in aux ()
    
let parse_base_type fd at =
  let rec aux () =
    try
      let t1,v1 = decompose_node_line at (input_line fd) in
        match t1 with 
	  | "DW_AT_byte_size" -> { typ_at = at; typ_size = int_of_string v1 }, skip_until_node_start fd
	  | _ -> aux ()
    with End_of_file -> error (Printf.sprintf "malformed base_type at: %x" at) 
  in aux ()

let parse_array_type fd at =
  try
    let t1,v1 = decompose_node_line at (input_line fd) in
    let nl = skip_until_node_start fd in
    match t1 with
      | "DW_AT_type" -> { arr_at = at; arr_typ = int_of_string (remove_paren v1) }, nl
      | _ -> error (Printf.sprintf "malformed array_type at: %x (1)" at)
  with End_of_file -> error (Printf.sprintf "malformed array_type at: %x (2)" at)

let parse_subrange_type fd at =
  try
    let t1,v1 = decompose_node_line at (input_line fd) in
    let t2,v2 = decompose_node_line at (input_line fd) in
    let nl = skip_until_node_start fd in
    match t1,t2 with
      | "DW_AT_type", "DW_AT_upper_bound" -> { sub_at = at; sub_typ = int_of_string (remove_paren v1); sub_upp_b = int_of_string v2 }, nl
      | _ -> error (Printf.sprintf "malformed array_type at: %x (1)" at)
  with End_of_file -> error (Printf.sprintf "malformed array_type at: %x (2)" at)

let parse_typedef fd at =
  let rec aux () =
    try
      let t1,v1 = decompose_node_line at (input_line fd) in
      match t1 with
	| "DW_AT_type" -> 
	  let nl = skip_until_node_start fd in
	  { tdf_at = at; tdf_typ = int_of_string (remove_paren v1) }, nl
	| _ -> aux ()
    with End_of_file -> error (Printf.sprintf "malformed typedef at: %x" at) in
  aux ()

let parse_volatile fd at =
  let rec aux () =
    try
      let t1,v1 = decompose_node_line at (input_line fd) in
      match t1 with
	| "DW_AT_type" -> 
	  let nl = skip_until_node_start fd in
	  { vol_at = at; vol_typ = int_of_string (remove_paren v1) }, nl
	| _ -> aux ()
    with End_of_file -> error (Printf.sprintf "malformed volatile at: %x" at) in
  aux ()

let parse_node fd l (* : node * string *) = 
  let at = 
    let il = (String.index_from l (String.index l '>') '<')+1 in
    let ir = String.index_from l ((String.index l '>')+1) '>' in
      int_of_string ("0x"^String.sub l il (ir-il)) in
  let kind = 
    let il = (String.index l '(')+1 in
    let ir = String.index l ')' in
    String.sub l il (ir-il) in
  match kind with
    | "DW_TAG_variable" -> 
	let v,nl = parse_variable fd at in
	let node = DW_TAG_variable v in
	  node, nl
    | "DW_TAG_pointer_type" -> 
	let p,nl = parse_pointer_type fd at in
	let node = DW_TAG_pointer_type p in
	  node, nl
    | "DW_TAG_base_type" -> 
	let p,nl = parse_base_type fd at in
	let node = DW_TAG_base_type p in
	  node, nl
    | "DW_TAG_array_type" -> 
	let p,nl = parse_array_type fd at in
	let node = DW_TAG_array_type p in
	  node, nl
    | "DW_TAG_subrange_type" -> 
	let p,nl = parse_subrange_type fd at in
	let node = DW_TAG_subrange_type p in
	  node, nl
    | "DW_TAG_structure_type" -> 
	let p,nl = parse_structure_type fd at in
	let node = DW_TAG_structure_type p in
	  node, nl
    | "DW_TAG_member" -> 
	let p,nl = parse_member fd at in
	let node = DW_TAG_member p in
	  node, nl
    | "DW_TAG_typedef" -> 
	let p,nl = parse_typedef fd at in
	let node = DW_TAG_typedef p in
	  node, nl
    | "DW_TAG_volatile_type" -> 
	let p,nl = parse_volatile fd at in
	let node = DW_TAG_volatile p in
	  node, nl
    | _ -> DW_TAG_unknown { unk_at = at }, (skip_until_node_start fd)

(* main *)
  
let rec extract_size_base_type dwarf n =
  match n with
    | DW_TAG_base_type b -> b.typ_size
    | DW_TAG_pointer_type p -> p.ptr_size
    | DW_TAG_structure_type s -> s.str_size (* CHECKME *)
    | DW_TAG_typedef t -> extract_size_base_type dwarf (find_node_at dwarf t.tdf_typ)
    | DW_TAG_volatile v -> extract_size_base_type dwarf (find_node_at dwarf v.vol_typ)
    | _ -> dump_node n; error ("-> extract_size_base_type, unexpected node")

let rec compute_type_tag dwarf typ : type_tag =
  match find_node_at dwarf typ with 
    | DW_TAG_variable _ -> error "compute_type_tag of var"
    | DW_TAG_pointer_type p -> TBase p.ptr_size
    | DW_TAG_base_type b -> TBase b.typ_size
    | DW_TAG_array_type a -> 
      let element_size = extract_size_base_type dwarf (find_node_at dwarf a.arr_typ) in
      let array_size_list = List.map (fun x -> x+1) (find_upp_b_subs dwarf typ) in
      let rec internal l a =
	match l with
	  | h::t -> (h*a)::(internal t (h*a))
	  | [] -> [] in
      TArr 
	(List.rev (internal (1::(List.rev array_size_list)) element_size),
         compute_type_tag dwarf a.arr_typ)
    | DW_TAG_subrange_type _ -> error "compute_type_tag of subrange"
    | DW_TAG_structure_type s ->
      let mems = find_mems dwarf typ in
      let mems_tt = List.map (fun (n,t,o) -> (n,o,compute_type_tag dwarf t)) mems in 
      TStr mems_tt
    | DW_TAG_member _ -> error "compute_type_tag of member"
    | DW_TAG_typedef t -> compute_type_tag dwarf t.tdf_typ
    | DW_TAG_volatile v -> compute_type_tag dwarf v.vol_typ
    | DW_TAG_unknown u -> error "compute_type_tag of unknown"
 
let dump_global_variable_infos dwarf n =
  match n with
    | DW_TAG_variable v -> 
      Printf.printf "%s %s\n" 
	v.var_name (dump_type_tag (compute_type_tag dwarf v.var_typ))
    | _ -> 
      error "cannot happen: dump gv infos internal"

(* inverse functions *)

let rec resolve_address type_info base addr =
  match type_info with
    | TArr (sl,tt) -> 
      begin
	let sub_arrays_size = List.tl sl in
	let offset = addr - base in
	let residual_offset = ref 0 in
	let rec aux sas offset =
	  match sas with
	    | [] -> []
	    | h::t -> 
	      residual_offset := offset mod h;
	      (offset/h)::(aux t (offset mod h)) in
	let il = aux sub_arrays_size offset in
	"["^(String.concat "][" (List.map string_of_int il)) ^"]"
	^ (resolve_address tt 0 !residual_offset)
      end
    | TStr sl ->
      let offset = addr - base in
      let (field,off,typ) = List.find (fun (_,o,_) -> o <= offset)
	(List.sort (fun (_,o1,_) (_,o2,_) -> o2 - o1) sl) in
      let suffix = resolve_address typ 0 (offset - off) in
      
      "."^field^suffix
    | _ -> ""
      
(* main *)

let _ =
  (* command line *)
  let files = ref [] in 
  Arg.parse [] (fun s -> files := s::!files)
    "usage: dwarfparse <file>";
  if !files = [] then error  "usage: dwarfparse <file>";
  let executable = List.hd !files in
  (* main *)
  let fd = 
    try Unix.open_process_in ("readelf -wi "^executable) 
    with _ -> error ("readelf -wi "^executable^ "failed.") in
  let l = 
    match skip_until_node_start fd with
      | Some l -> l
      | None -> error ("could not find the first node")
  in 
  let rec aux fd l dwarf =
    let node, nl = parse_node fd l in
    if nl = None then node::dwarf else aux fd (the nl) (node::dwarf)in
  let dwarf = List.rev (aux fd l []) in
  List.iter dump_node dwarf;

  (* for each variable g_ or a_, dump the total size and size of each element *)
  print_newline ();
  List.iter 
    (fun n -> if is_global_variable n then dump_global_variable_infos dwarf n else ()) 
    dwarf;

  (* debug of inverse functions *)
  print_newline ();
  print_endline (resolve_address (TArr ([40;4],TBase 4)) 0 12);
  print_endline (resolve_address (TArr ([200;20;4],TBase 4)) 0 0);
  print_endline (resolve_address (TArr ([200;20;4],TBase 4)) 0 52);
  print_endline (resolve_address (TStr [("f1",0,TBase 4);("f2",8,TBase 8);("f3",16,TBase 1)]) 0 0);
  print_endline (resolve_address (TStr [("f1",0,TBase 4);("f2",8,TBase 8);("f3",16,TBase 1)]) 0 8);
  print_endline (resolve_address (TStr [("f1",0,TBase 4);("f2",8,TBase 8);("f3",16,TBase 1)]) 0 16);


  print_endline (resolve_address
    (TStr [("f1",0,TBase 4);("f2",8,TArr ([4;1],TBase 1));("f3",16,TBase 1)]) 
    0 9);

  print_endline (resolve_address (TArr ([200;20;4],(TStr [("f1",0,TBase 4)]))) 0 52);

  print_endline (resolve_address (TArr ([200;20;4],(TStr [("f1",0,TBase 2);("f2",2,TBase 2)]))) 0 54)







