Adaptacyjne kodowanie Huffmana wg. Vittera

0

Witam

Mam pewien problem z implementacją algorytmu Vittera tzn. próbuję zrozumieć
jak działa jego implementacja w oparciu o floating tree. Rozumiem ogólną
koncepcję tego algorytmu, ale już jego implementacja z uwagi na niezbyt dokładne komentarze autora przysparza
trochę kłopotów. Poniżej zamieszczam kawałek kodu w Ocamlu (ale zapis czysto imperatywny ;-) ), który
powinien zakodować strumień liczb 0...2^16 - 1 zadany w postaci strumienia (Lazy list) typu
'a stream = EOS | Cons of 'a * 'a stream Lazy.t;;

let n = 65536;; (* Size of an input alphabet (we work with numbers 0..2^16-1. *)
let z = 2 * n - 1;;                               (* Maximum number of nodes. *)
let mis = ref 0   (* Number of zero-weight letters (missing) in the alphabet. *)
and exp = ref 0           (* An exponent in the expression mis = 2^exp + rem. *)
and rem = ref 0           (* A remainder in the expression mis = 2^exp + rem. *)
and alpha = Array.make (n + 1) 0                   (* alpha[#node] = #letter. *)
and rep = Array.make (n + 1) 0                       (* rep[#letter] = #node. *)
and available_block = ref 0 (* Number of irst available block or 0 otherwise. *)
and block = Array.make (z + 1) 0              (* block[#node] = block number. *)
and weight = Array.make (z + 1) 0    (* Weight of each node in a given block. *)
and parent = Array.make (z + 1) 0 (* Parent of block's leader or 0 otherwise. *)
and parity = Array.make (z + 1) 0 (* 0 if node is left child or root, 1 othw. *)
and right_child = Array.make (z + 1) 0  (* The right child of block's leader. *)
and first = Array.make (z + 1) 0              (* The leader of a given block. *)
and last = Array.make (z + 1) 0 (* A node with the lowest explicit numbering. *)
and prev_block = Array.make (z + 1) 0 (* The previous block on the cir. list. *)
and next_block = Array.make (z + 1) 0 (* The next block on the circular list. *)
and stack = Array.make (n + 1) 0;;   (* Bits of the enc. of the curr. letter. *)


let initialize =
  mis := 0;
  exp := 0;
  rem := (-1);
  for i = 1 to n do
    mis := !mis + 1;
    rem := !rem + 1;
    if 2 * !rem = !mis then
      begin
        exp := !exp + 1;
        rem := 0
      end;
    alpha.(i) <- i;
    rep.(i) <- i;
  done;
  block.(n) <- 1;
  prev_block.(1) <- 1;
  next_block.(1) <- 1;
  weight.(1) <- 0;
  first.(1) <- n;
  last.(1) <- n;
  parity.(1) <- 0;
  parent.(1) <- 0;
  available_block := 2;
  for i = !available_block to z - 1 do
    next_block.(i) <- (i + 1);
  done;
  next_block.(z) <- 0;;


let swap_leaves lhs rhs =
  rep.(alpha.(lhs)) <- rhs;
  rep.(alpha.(rhs)) <- lhs;
  let tmp = alpha.(lhs)
  in
    begin
      alpha.(lhs) <- alpha.(rhs);
      alpha.(rhs) <- tmp
    end;;



let find_child j parity =
  let delta = 2 * (first.(block.(j)) - j) + 1 - parity
  and right = right_child.(block.(j))
  in
    let gap = right - last.(block.(right))
    in
      if delta <= gap then right - delta
      else
        let delta = delta - gap - 1
        and right = first.(prev_block.(block.(right)))
        in
          let gap = right - last.(block.(right))
          in
            if delta <= gap then right - delta
            else first.(prev_block.(block.(right))) - delta + gap + 1;;


let update k =
  let q = ref 0 and leaf_to_inc = ref 0 and bq = ref 0 and b = ref 0
  and old_parent = ref 0 and old_parity = ref 0 and nbq = ref 0 and par = ref 0
  and bpar = ref 0 and slide = ref false
  in
    let find_node =
      q := rep.(k);
      leaf_to_inc := 0;
      if !q <= !mis then
        begin
          swap_leaves !q !mis;
          if !rem = 0 then
            begin
              rem := (!mis / 2);
              if !rem > 0 then
                exp := !exp - 1
            end;
          mis := !mis - 1;
          rem := !rem - 1;
          q := !mis + 1;
          bq := block.(!q);
          if !mis > 0 then
            begin
              block.(!mis) <- !bq;
              last.(!bq) <- !mis;
              old_parent := parent.(!bq);
              parent.(!bq) <- !mis + n;
              parity.(!bq) <- 1;
              b := !available_block;
              available_block := next_block.(!available_block);
              prev_block.(!b) <- !bq;
              next_block.(!b) <- next_block.(!bq);
              prev_block.(next_block.(!bq)) <- !b;
              next_block.(!bq) <- !b;
              parent.(!b) <- !old_parent;
              parity.(!b) <- 0;
              right_child.(!b) <- !q;
              block.(!mis + n) <- !b;
              weight.(!b) <- 0;
              first.(!b) <- (!mis + n);
              last.(!b) <- (!mis + n);
              leaf_to_inc := !q;
              q := !mis + n
            end
        end
      else
        begin
          swap_leaves !q first.(block.(!q));
          q := first.(block.(!q));
          if ((!q = !mis + 1) && (!mis > 0)) then
            begin
              leaf_to_inc := !q;
              q := parent.(block.(!q))
            end
        end
    and slide_and_increment =
      bq := block.(!q);
      nbq := next_block.(!bq);
      par := parent.(!bq);
      old_parent := !par;
      old_parity := parity.(!bq);
      if ( ((!q <= n) && (first.(!nbq) > n) && (weight.(!nbq) = weight.(!bq)))
           || ((!q > n) && (first.(!nbq) <= n) &&
               (weight.(!nbq) = weight.(!bq) + 1)))  then
        begin
          slide := true;
          old_parent := parent.(!nbq);
          old_parity := parity.(!nbq);
          if !par > 0 then
            begin
              bpar := block.(!par);
              if right_child.(!bpar) = !q then
                right_child.(!bpar) <- last.(!nbq)
              else if right_child.(!bpar) = first.(!nbq) then
                right_child.(!bpar) <- !q
              else
                right_child.(!bpar) <- right_child.(!bpar) + 1;
              if !par <> z then
                if block.(!par + 1) <> !bpar then
                  if right_child.(block.(!par + 1)) = first.(!nbq) then
                    right_child.(block.(!par + 1)) <- !q
                  else if block.(right_child.(block.(!par + 1))) = !nbq then
                    right_child.(block.(!par + 1)) <-
                      (right_child.(block.(!par + 1)) + 1)
            end;
          parent.(!nbq) <- parent.(!nbq) - 1 + parity.(!nbq);
          parity.(!nbq) <- (1 - parity.(!nbq));
          nbq := next_block.(!nbq)
        end
      else
        slide := false;
      if ( (((!q <= n) && (first.(!nbq) <= n)) ||
            ((!q > n) && (first.(!nbq) > n)))
           && (weight.(!nbq) = weight.(!bq) + 1) ) then
        begin
          block.(!q) <- !nbq;
          last.(!nbq) <- !q;
          if last.(!bq) = !q then
            begin
              next_block.(prev_block.(!bq)) <- next_block.(!bq);
              prev_block.(next_block.(!bq)) <- prev_block.(!bq);
              next_block.(!bq) <- !available_block;
              available_block := !bq
            end
          else
            begin
              if !q > n then
                right_child.(!bq) <- (find_child (!q - 1) 1);
              if parity.(!bq) = 0 then
                parent.(!bq) <- parent.(!bq) - 1;
              parity.(!bq) <- 1 - parity.(!bq);
              first.(!bq) <- (!q - 1)
            end
        end
      else if last.(!bq) = !q then
        begin
          if !slide then
            begin
              prev_block.(next_block.(!bq)) <- prev_block.(!bq);
              next_block.(prev_block.(!bq)) <- next_block.(!bq);
              prev_block.(!bq) <- prev_block.(!nbq);
              next_block.(!bq) <- !nbq;
              prev_block.(!nbq) <- !bq;
              next_block.(prev_block.(!bq)) <- !bq;
              parent.(!bq) <- !old_parent;
              parity.(!bq) <- !old_parity
            end;
          weight.(!bq) <- (weight.(!bq) + 1)
        end
      else
        begin
          b := !available_block;
          available_block := next_block.(!available_block);
          block.(!q) <- !b;
          first.(!b) <- !q;
          last.(!b) <- !q;
          if !q > n then
            begin
              right_child.(!b) <- right_child.(!bq);
              right_child.(!bq) <- (find_child (!q - 1) 1);
              if right_child.(!b) = (!q - 1) then
                parent.(!bq) <- !q
              else if parity.(!bq) = 0 then
                parent.(!bq) <- (parent.(!bq) - 1)
            end
          else if parity.(!bq) = 0 then
            parent.(!bq) <- (parent.(!bq) - 1);
          first.(!bq) <- (!q - 1);
          parity.(!bq) <- (1 - parity.(!bq));
          prev_block.(!b) <- prev_block.(!nbq);
          next_block.(!b) <- !nbq;
          prev_block.(!nbq) <- !b;
          next_block.(prev_block.(!b)) <- !b;
          weight.(!b) <- weight.(!bq) + 1;
          parent.(!b) <- !old_parent;
          parity.(!b) <- !old_parity;
        end;
      q := if !q <= n then !old_parent else !par
    in
      begin
        find_node;
        while !q > 0 do
          Printf.printf "update %d\n" !q;
          slide_and_increment;
        done;
        if !leaf_to_inc <> 0 then
          begin
            q := !leaf_to_inc;
            slide_and_increment
          end
      end;;


let rec vencode s =
  match s with
  | Cons (h, t) ->
      let q = ref (rep.(h + 1))
      and i = ref 0
      in
        begin
          if !q < !mis then
            begin
              q := !q - 1;
              let t = (if !q < 2 * !rem then !exp + 1 else begin q:= !q - !rem; !exp end)
              in
                for k = 1 to t do
                  i := !i + 1;
                  stack.(!i) <- (!q mod 2);
                  q := (!q / 2)
                done;
              q := !mis
            end;
          let root = (if !mis = n then n else z)
          in
            while !q <> root do
              i := !i + 1;
              stack.(!i) <- ( (first.(block.(!q)) - (!q) + parity.(block.(!q))) mod 2 );
              q := parent.(block.(!q)) - ( (first.(block.(!q)) - (!q) + 1 - parity.(block.(!q))) / 2 )
            done;
          for k = !i downto 1 do
            Printf.printf "%d" stack.(k)
          done;
          Printf.printf "\n";
        end;
      update (h + 1);
      vencode (Lazy.force t);
  | EOS -> Printf.printf "[the end]\n";;

Byłbym wdzięczny za pomysły dlaczego procedura update się zapętla. W ogólnej koncepcji pętla while w proc. update
działa dopóki q nie stanie się korzeniem drzewa, ale w kodzie Vitter napisał while !q >0 do... Przecież węzeł q stając się
w każdej iteracji liderem swojego bloku powinien iść do góry do korzenia. Próbowałem zmienić ten warunek na

let root = (if !mis < n then 2 * n - 1 else n) (* Jeżeli jest tylko 1 węzeł to n jest korzeniem *)
in while !q <> root do ...

ale to też nie zadziałało.

Pozdrawiam

Bazyli

0

Sorki, głupi błąd. W swojej publikacji J. Vitter w procedurze SlideAndIncrement zapomniał o przypadku, gdy dany ani (p jest liściem i nast. blok zawiera węzły wewn. o tej samej wadze co p)
ani (p nie jest węzł. wewn. o wadze wt. a następy blok to blok liści o wadze wt + 1). Z tego co widzę chyba w przeciwnym przypadku należy iść dalej do korzenia i zwiększać o jeden.
Nie było tego ani w jego pierwszej teoretycznej publikacji porównującej swój al. z FGK ani tej drugiej z impl. w Pascalu. Dziwi mnie to, bo skoro opublikował impl. to czy jej w ogóle nie testował?

1 użytkowników online, w tym zalogowanych: 0, gości: 1