3 / 3
Dec 2006

While trying to solve problem 31 (very fast multiplication) in ocaml I've found myself trying various methods. The only algorithm I know well is the regular arithmetic one that takes O(n^2). I know this won't be fast enough, but I learned some interesting things anyhow. The num library's multiply is pretty fast but it takes more than twice the amount of time than is required by problem 31.

I have tested 3 primary methods:

a) using the built-in Num library (which the server admin kindly added to the compiling command)
b) using the built-in array type and module
c) using the Bigarray module

I tested each with some very large numbers and got some timing results. My conclusion was to not ask the admin to also add bigarray.cmxa. First I'll post the data and then the source code. The input file contained 4 input pairs totaling 18,451 bytes. This was run on a sempron 2400.

I won't post the comparison here, but I noticed that the -unsafe flag did speed up the Array implementation, whereas it did not speed up the Bigarray implementation.

num implementation:

real    0m0.437s
user    0m0.138s
sys     0m0.015s
real    0m0.500s
user    0m0.107s
sys     0m0.015s
real    0m0.453s
user    0m0.139s
sys     0m0.015s

array implementation:

real    0m3.700s
user    0m3.357s
sys     0m0.015s
real    0m3.702s
user    0m3.311s
sys     0m0.015s
real    0m3.695s
user    0m3.342s
sys     0m0.000s

bigarray implementation:

real    0m4.744s
user    0m4.467s
sys     0m0.000s
real    0m4.773s
user    0m4.466s
sys     0m0.015s
real    0m4.758s
user    0m4.482s
sys     0m0.030s

It seems that caml has trouble doing lots of manipulations on either kind of array. Bigarrays are quite a bit slower than regular arrays however. Is the performance problem limited to writes or reads, or does it affect both?

For the heck of it I timed the following for comparison:

a) max_int+1 writes to an int array
b) max_int+1 writes to an int ref
c) max_int+1 reads from the array

writing to the array:

real    0m2.094s
user    0m1.983s
sys     0m0.015s
real    0m2.063s
user    0m2.077s
sys     0m0.000s
real    0m2.062s
user    0m1.999s
sys     0m0.000s

writing to the ref:

real    0m2.099s
user    0m1.999s
sys     0m0.000s
real    0m2.085s
user    0m2.109s
sys     0m0.000s
real    0m2.075s
user    0m1.999s
sys     0m0.015s

reading from an array

real    0m2.137s
user    0m1.968s
sys     0m0.015s
real    0m2.078s
user    0m1.983s
sys     0m0.015s
real    0m2.076s
user    0m2.077s
sys     0m0.015s

reading from a reference

real    0m2.097s
user    0m2.093s
sys     0m0.015s
real    0m2.080s
user    0m1.983s
sys     0m0.015s
real    0m2.082s
user    0m2.077s
sys     0m0.015s

(note that 2 seconds is about how long it takes a loop to count from 0 to max_int, so that the reading/writing basically took no resources)

It seems that, as long as you compile with -unsafe, arrays are just as fast as references. This makes sense because references are essentially one element arrays. But the drawback is that any programming in ocaml that requires very fast manipulation of referenced values or arrays is going to be slow. Because of the discussion in the following link, I think this is because of ocaml's garbage collector. Not that it's a bad garbage collector! It's just not suited for tasks like problem 31. Fast Multiplication.

http://groups.google.com/group/comp.lang.functional/browse_thread/thread/d71644093332cf87/313e6ab8393a64f52

multiplication code
num implementation

open Num
let multiply n =
  for i=1 to n do
    Scanf.scanf
      "\n%s %s"
      (fun num1 num2 ->
	let num1 = num_of_string num1 in
	let num2 = num_of_string num2 in
	print_endline
	  (string_of_num
	     (mult_num num1 num2))   
      )
  done;;
Scanf.scanf "%i" (multiply);
exit 0;

array implementation

let a = Array.create 10000 0;;
let b = Array.create 10000 0;;
let res = Array.create 20000 0;;
let decompstr array str strlen len =
  match array with
    'a' ->
      if strlen<len then
	for i=strlen to len do a.(i) <- 0 done;
      for i=0 to strlen-1 do
	a.(i) <- (int_of_char str.[strlen-i-1]) - 48
      done;
  | 'b' ->
      if strlen<len then
	for i=strlen to len do b.(i) <- 0 done;
      for i=0 to strlen-1 do
	b.(i) <- (int_of_char str.[strlen-i-1]) - 48
      done;;
let mularray len =
  for i=0 to 2*len-1 do res.(i) <- 0 done;
  let remainder = ref 0 in
  for i=0 to len - 1 do
    for j=0 to len - 1 do
      let result = a.(i) * b.(j) + !remainder + res.(i+j) in
      remainder := result/10;
      res.(i+j) <- result - !remainder*10;
    done;
    res.(i+len) <- !remainder;
    remainder := 0;
  done;;
let recompstr len =
  let beg = ref (-1) in
  let pos = ref (len-1) in
  while(!beg<0) do
    if !pos <= 0 then
      (
       beg := 0;
      )
    else
      if res.(!pos) <> 0 then
	beg := !pos
      else
	decr pos;
  done;
  for i = 0 to !pos do
    print_char (char_of_int (res.(!pos-i)+48))
  done;;
let multiply n =
  for i=1 to n do
    Scanf.scanf
      "\n%s %s"
      (fun num1 num2 ->
	let len1 = String.length num1 in
	let len2 = String.length num2 in
	let max =
	  if len1>len2 then len1 else len2 in
	decompstr 'a' num1 len1 max;
	decompstr 'b' num2 len2 max;
	mularray max;
	recompstr (2*max);
	print_endline "";
	flush stdout;
      )
  done;;
Scanf.scanf "%i" (multiply);
exit 0;

bigarray implementation

open Bigarray;;
open Bigarray.Array1;;
let a = create int c_layout 10000;;
let b = create int c_layout 10000;;
let res = create int c_layout 20000;;
let decompstr array str strlen len =
  match array with
    'a' ->
      if strlen<len then
	for i=strlen to len do a.{i} <- 0 done;
      for i=0 to strlen-1 do
	a.{i} <- (int_of_char str.[strlen-i-1]) - 48
      done;
  | 'b' ->
      if strlen<len then
	for i=strlen to len do b.{i} <- 0 done;
      for i=0 to strlen-1 do
	b.{i} <- (int_of_char str.[strlen-i-1]) - 48
      done;;
let mularray len =
  for i=0 to 2*len-1 do res.{i} <- 0 done;
  let remainder = ref 0 in
  for i=0 to len - 1 do
    for j=0 to len - 1 do
      let result = a.{i} * b.{j} + !remainder + res.{i+j} in
      remainder := result/10;
      res.{i+j} <- result - !remainder*10;
    done;
    res.{i+len} <- !remainder;
    remainder := 0;
  done;;
let recompstr len =
  let beg = ref (-1) in
  let pos = ref (len-1) in
  while(!beg<0) do
    if !pos <= 0 then
      (
       beg := 0;
      )
    else
      if res.{ !pos } <> 0 then
	beg := !pos
      else
	decr pos;
  done;
  for i = 0 to !pos do
    print_char (char_of_int (res.{ !pos-i } + 48) )
  done;;
let multiply n =
  for i=1 to n do
    Scanf.scanf
      "\n%s %s"
      (fun num1 num2 ->
	let len1 = String.length num1 in
	let len2 = String.length num2 in
	let max =
	  if len1>len2 then len1 else len2 in
	decompstr 'a' num1 len1 max;
	decompstr 'b' num2 len2 max;
	mularray max;
	recompstr (2*max);
	print_endline "";
	flush stdout;
      )
  done;;
Scanf.scanf "%i" (multiply);
exit 0;

testing array write speed

let a = [|0|]
for i=0 to max_int do
  a.(0) <- 3;
done

testing ref write speed

let temp = ref 0;;
for i=0 to max_int do
  temp := 3
done

reading from the array

let a = [|0|];;
for i=0 to max_int do
  ignore (a.(0))
done

reading from the reference

let a := 0;;
for i=0 to max_int do
  ignore (!a)
done
  • created

    Dec '06
  • last reply

    Dec '06
  • 2

    replies

  • 555

    views

  • 2

    users

  • 1

    link

There is a more efficient algorithm (search these forums for more information), and in order to have VFMUL accepted, you will probably need to implement such an algorithm efficiently in a fast language. Start with TMUL and MUL if you haven't already smile

Unfortunately, my attempts to solve VFMUL in ocaml have led me to believe that it can't be done; not because of the algorithm, but because of ocaml's garbage collector. It simply causes programs that do lots of manipulations on array values to run too slowly. It's not that caml's garbage collector is bad overall, just in this case it's not optimal.

The built-in Num library's multiplication on large numbers takes 4.5 seconds on VFMUL's input. On the same input, an O(n^2) implementation in arrays took >30 seconds.