I am trying to load an ascii data file where data are either strings or floats. Strings are always formatted as ~foo~ and floats can have no symbol around them. Fields are seperated by the ^ character. So an example is : ~203~^~g~^~PROCNT~^~Protein~^~2~^~600~
I have used the following to extract easily each field in each line, but it seems a little slow. SO I was wondering if there were better/simpler ways.
#light
open System
open System.IO
open System.Collections.Generic
let reader filename =
seq
{ use reader = new StreamReader(File.OpenRead(filename))
while not reader.EndOfStream do
//replace twice so that ^^^^ gets properly replaced by ^0.0^0.0^0.0^, so that String splitaccounts for these missing values
let line = reader.ReadLine().Replace("^^", "^0.0^").Replace("^^", "^0.0^")
yield if line.[line.Length - 1] = '^' then line + "0.0" else line
}
let extract (s : string) =
if s.Length >= 2 && s.[0] = '~' then s.[1 .. s.Length - 2] else s
let to_float (s : string) =
if s.Length > 0 then float s else 0.0 //so that ~~ is indeed transformed to 0.0
let line_to_strings line =
String.split ['^'] line
|> List.map (fun xs -> xs |> extract)
|> Array.of_list //then for each line one can use line.[# of field]
let path = "C:\\"
let file f = Filename.concat path f
let _ =
reader (file "NUTR_DEF.txt")
|> Seq.map line_to_strings
|> Seq.iter (fun x -> print_endline (any_to_string (Array.length x, x))
Thanks for your help