#!/usr/bin/env python

import sys

from the_semantic_db_code import *
from the_semantic_db_functions import *
from the_semantic_db_processor import *

C = context_list("play with letter counts")

file_table = {
  "Alice-in-Wonderland"  : "ebooks/ebook-Alices_Adventures_in_Wonderland_11.txt",
  "I-Robot"              : "ebooks/ebook-Asimov_Isaac_-_I_Robot.txt",
  "Frankenstein"         : "ebooks/ebook-Frankenstein_84.txt",
  "Moby-Dick"            : "ebooks/ebook-Moby_Dick_2701.txt",
  "Shakespeare"          : "ebooks/ebook-moby-shakespeare.txt",
  "Sherlock-Holmes"      : "ebooks/ebook-Sherlock-Holmes.txt",
  "Tom-Sawyer"           : "ebooks/ebook-Tom_Sawyer_74.txt",
  "Gone-with-Wind"       : "ebooks/ebook-Gone-with-the-wind--0200161.txt",
  "nineteen-eighty-four" : "ebooks/ebook-nineteen-eighty-four--0100021.txt",
}

def dict_to_sp(dict):
  result = superposition()
  for x in dict:
    result.data.append(ket(x,dict[x]))
  return result


def file_to_sp(filename):
  dict = {}
  with open(filename,'r') as f:
    for line in f:
      for char in [ c for c in list(line) if c in 'abcdefghijklmnopqrstuvwxyz' ]:
        if char not in dict:
          dict[char] = 1
        else:
          dict[char] += 1
  return dict_to_sp(dict).ket_sort()

# quick test:
#print(file_to_sp("fish.txt"))

for topic in file_table:
  file = file_table[topic]
  print("topic: " + topic)
  print("file:  " + file)

  sp = file_to_sp(file)

  C.learn("letter-count",topic,sp)


print(C.dump_universe())

name = "sw-examples/ebook-letter-counts.sw"
save_sw(C,name)