[Shootout-list] fasta (OCaml)

Brent Fulgham bfulg@pacbell.net
Sat, 5 Mar 2005 16:40:57 -0800


Applied, thanks.

On Mar 4, 2005, at 2:04 AM, ChriS wrote:

> (* http://shootout.alioth.debian.org/benchmark.php?test=fasta&lang=all 
> *)
> (* This version is only marginally faster (2%) -- but less elegant --
>    than the HOF one. *)
>
> (* Random number generator (Shootout version) *)
> let rec gen_random =
>   let im = 139968
>   and ia = 3877
>   and ic = 29573 in
>   let last = ref 42
>   and inv_im = 1. /. float im in
>   fun max ->
>     last := (!last * ia + ic) mod im;
>     max *. float !last *. inv_im
>
>
> let make_cumulative =
>   let rec cumul prob = function
>     | [] -> []
>     | (c, p) :: tl -> let prob = prob +. p in (c, prob) :: cumul prob 
> tl  in
>   fun table -> cumul 0. table
>
> let rec rand_char (prob : float) = function
>   | [] -> failwith "rand_c"
>   | (c, p) :: tl -> if prob <= p then c else rand_char prob tl
>
> let width = 60
>
> (* [write s i0 l w] outputs [w] chars of [s.[0 .. l]] starting with
>    [s.[i0]] and considering the substring [s.[0 .. l]] as a "circle".
>    One assumes [0 <= i0 <= l <= String.length s].
>    @return [i0] needed for subsequent writes.  *)
> let rec write s i0 l w =
>   let len = l - i0 in
>   if w <= len then (output stdout s i0 w; i0 + w)
>   else (output stdout s i0 len; write s 0 l (w - len))
>
> let make_repeat_fasta id desc src n =
>   Printf.printf ">%s %s\n" id desc;
>   let i0 = ref 0
>   and l = String.length src in
>   for i = 1 to n / width do
>     i0 := write src !i0 l width;
>     print_char '\n';
>   done;
>   i0 := write src !i0 l (n mod width);
>   print_char '\n'
>
> let make_random_fasta id desc table n =
>   Printf.printf ">%s %s\n" id desc;
>   let table = make_cumulative table in
>   for i = 1 to n / width do
>     for j = 1 to width do print_char(rand_char (gen_random 1.) table); 
> done;
>     print_char '\n';
>   done;
>   for j = 1 to n mod width do
>     print_char(rand_char (gen_random 1.) table);
>   done;
>   print_char '\n'
>
>
>
> let alu = "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG\
> GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA\
> CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT\
> ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA\
> GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG\
> AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC\
> AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"
>
> let iub = [
>   ('a', 0.27);	('c', 0.12);	('g', 0.12);	('t', 0.27);
>
>   ('B', 0.02);	('D', 0.02);	('H', 0.02);	('K', 0.02);
>   ('M', 0.02);	('N', 0.02);	('R', 0.02);	('S', 0.02);
>   ('V', 0.02);	('W', 0.02);	('Y', 0.02);
> ]
>
> let homosapiens = [
>     ('a', 0.3029549426680);    ('c', 0.1979883004921);
>     ('g', 0.1975473066391);    ('t', 0.3015094502008);
> ]
>
> let () =
>   let n = int_of_string Sys.argv.(1) in
>   make_repeat_fasta "ONE" "Homo sapiens alu" alu (n*2);
>   make_random_fasta "TWO" "IUB ambiguity codes" iub (n*3);
>   make_random_fasta "THREE" "Homo sapiens frequency" homosapiens (n*5)