Révision | 60c48182945a24d2f95e9bdff42cdabd01ad6010 |
---|---|
Taille | 3,629 octets |
l'heure | 2010-10-13 07:23:50 |
Auteur | lorenzo |
Message de Log | Apart from some minor modifications, I am now properly iterating a function
|
import Data.Ord
import Data.List
main :: IO ()
main = do
-- txt <- readFile "flist.dat"
-- let nums :: [Int]
-- nums = concat $ convert txt -- Now dat is a list where every element is a row of the original data table
-- Uncomment the bit above if you want to read the list of files from a file
let nums=[1..2]
-- cl <- file_conv_all nums
-- let entr_all= map entropy_list cl
-- save_vector "entropy.dat" entr_all
-- NB: the 3 lines above (which are now comments) calculate correctly the entropy
-- but they have the problem if using too many file handles. The procedure I
-- follow below is really the correct one.
my_entropy <- get_all_entropies nums
save_vector "entropy.dat" my_entropy
putStrLn "So far so good "
-- #########################################################
filename :: Int -> FilePath
filename i = "file" ++ show i ++ ".dat"
fileLength :: FilePath -> IO Int
fileLength file = fmap length (readFile file)
getAllLengths :: [Int] -> IO [Int]
getAllLengths nums = mapM (fileLength . filename) nums
-- #########################################################
-- the 3 functions above are examples by Daniel to calculate the lengths of multiple files
-- and they are a kind of template: the whole point is to replace fileLength with my own function
-- and getAllLengths with the iteration of my own function.
entropy_on_file file = fmap entropy_list (conv_to_list file)
-- entropy_on_file is along the lines of fileLength by Daniel, just I need to be careful when
-- reading something (a string NOT consisting only of numbers [there are also letters]) into a list
-- (I now need to use conv_to_list instead of readFile).
get_all_entropies nums = mapM (entropy_on_file . filename) nums
-- get_all_entropies is precisely along the lines of getAllLengths by Daniel and it iterates
-- entropy_on_file on my file list.
conv_to_list file = fmap lines ( (readFile file))
file_conv_all nums = mapM (conv_to_list . filename) nums
is_sublist sublist list = sublist `isInfixOf` list
gen_fut_list list i j = take j $ drop (i-1) list
gen_past_list list i = take (i-1) list
find_in_list list i j = is_sublist (gen_fut_list list i j) (gen_past_list list i)
iter_find list i = map (find_in_list list i) [1..n]
where n = (length list) - i +1
iter_find_efficient list i = takeWhile id $ iter_find list i
count_string_length list i = 1+ my_sum
where my_sum = length $ iter_find_efficient list i
-- NB: I need to add 1 every time since I want the length of the shortest list in the future
-- which has not occurred in the past
sum_string_lengths list = sum $ map (count_string_length list) [1..n]
where n = length list
list_string_lengths list = map (count_string_length list) [1..n]
where n = length list
pref_list list = 1.0/(n* log n / log 2.0)
where n = fromIntegral (length list)
entropy_list list = 1.0/((pref_list list) * fromIntegral (sum_string_lengths list) )
save_vector_flat filename list = writeFile filename $ unlines (map show $ concat list)
save filename zs = writeFile filename (show zs)
save_vector filename list = writeFile filename $ unlines (map show list)
convert x = (map (map read . words) . lines) x