[Shootout-list] fasta (OCaml)
Brent Fulgham
bfulg@pacbell.net
Sat, 5 Mar 2005 16:40:57 -0800
Applied, thanks.
On Mar 4, 2005, at 2:04 AM, ChriS wrote:
> (* http://shootout.alioth.debian.org/benchmark.php?test=fasta&lang=all
> *)
> (* This version is only marginally faster (2%) -- but less elegant --
> than the HOF one. *)
>
> (* Random number generator (Shootout version) *)
> let rec gen_random =
> let im = 139968
> and ia = 3877
> and ic = 29573 in
> let last = ref 42
> and inv_im = 1. /. float im in
> fun max ->
> last := (!last * ia + ic) mod im;
> max *. float !last *. inv_im
>
>
> let make_cumulative =
> let rec cumul prob = function
> | [] -> []
> | (c, p) :: tl -> let prob = prob +. p in (c, prob) :: cumul prob
> tl in
> fun table -> cumul 0. table
>
> let rec rand_char (prob : float) = function
> | [] -> failwith "rand_c"
> | (c, p) :: tl -> if prob <= p then c else rand_char prob tl
>
> let width = 60
>
> (* [write s i0 l w] outputs [w] chars of [s.[0 .. l]] starting with
> [s.[i0]] and considering the substring [s.[0 .. l]] as a "circle".
> One assumes [0 <= i0 <= l <= String.length s].
> @return [i0] needed for subsequent writes. *)
> let rec write s i0 l w =
> let len = l - i0 in
> if w <= len then (output stdout s i0 w; i0 + w)
> else (output stdout s i0 len; write s 0 l (w - len))
>
> let make_repeat_fasta id desc src n =
> Printf.printf ">%s %s\n" id desc;
> let i0 = ref 0
> and l = String.length src in
> for i = 1 to n / width do
> i0 := write src !i0 l width;
> print_char '\n';
> done;
> i0 := write src !i0 l (n mod width);
> print_char '\n'
>
> let make_random_fasta id desc table n =
> Printf.printf ">%s %s\n" id desc;
> let table = make_cumulative table in
> for i = 1 to n / width do
> for j = 1 to width do print_char(rand_char (gen_random 1.) table);
> done;
> print_char '\n';
> done;
> for j = 1 to n mod width do
> print_char(rand_char (gen_random 1.) table);
> done;
> print_char '\n'
>
>
>
> let alu = "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG\
> GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA\
> CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT\
> ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA\
> GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG\
> AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC\
> AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"
>
> let iub = [
> ('a', 0.27); ('c', 0.12); ('g', 0.12); ('t', 0.27);
>
> ('B', 0.02); ('D', 0.02); ('H', 0.02); ('K', 0.02);
> ('M', 0.02); ('N', 0.02); ('R', 0.02); ('S', 0.02);
> ('V', 0.02); ('W', 0.02); ('Y', 0.02);
> ]
>
> let homosapiens = [
> ('a', 0.3029549426680); ('c', 0.1979883004921);
> ('g', 0.1975473066391); ('t', 0.3015094502008);
> ]
>
> let () =
> let n = int_of_string Sys.argv.(1) in
> make_repeat_fasta "ONE" "Homo sapiens alu" alu (n*2);
> make_random_fasta "TWO" "IUB ambiguity codes" iub (n*3);
> make_random_fasta "THREE" "Homo sapiens frequency" homosapiens (n*5)