#!/usr/bin/env python import sys from the_semantic_db_code import * from the_semantic_db_functions import * from the_semantic_db_processor import * C = context_list("fragment documents") frag_sequences = ["<","|",">"] s = "some
text|you know<" # ahh.. the Bortic method might be the way to go: # split[">"]:split["|"]:split["<"]:list # def split_list(fragment,list): result = [] for s in list: result += s.split(fragment) return result result = split_list("|",split_list(">",split_list("<",[s]))) print("result:",result) # OK. Now, lets do the full thing then. The Bortic method is ugly in python, unfortunately. def first_fragment_string(s,fragments): r = [s] for frag in fragments: r = split_list(frag,r) return r result = first_fragment_string(s,frag_sequences) print("result:",result) def fragment_string(s,fragments): r = [s] for frag in fragments: list = r r = [] for s in list: r += s.split(frag) return r result = fragment_string(s,frag_sequences) print("result:",result) fragments = ["AGCTAA","ACGACG","TATATA"] s = "AAGCTAGCTAAGGTACGACGAAGCT" result = fragment_string(s,fragments) print("s :",s) print("result:",result) #sys.exit(0) ############################################# print() print() file_table = { "eztv-1" : "web-pages/eztv-1.html", "eztv-2" : "web-pages/eztv-2.html", "diary-1" : "web-pages/k5-diary-1.html", "diary-2" : "web-pages/k5-diary-2.html", "wc-comments-1" : "web-pages/wc-comments-1.html", } fragments = ["<","|",">"] def load_fragments(filename,fragments): result = superposition() with open(filename,'r') as f: text = f.read() for sequence in fragment_string(text,fragments): sequence = sequence.strip() # result += ket(str(len(sequence))) if len(sequence) > 0: # result += ket(str(len(sequence))) result += ket(sequence) return result file = "web-pages/wc-comments-1.html" file = "web-pages/k5-diary-1.html" result = load_fragments(file,fragments).drop_below(0).coeff_sort().long_display() print("result:") print(result) print() sys.exit(0) file1 = "web-pages/k5-diary-1.html" file2 = "web-pages/k5-diary-2.html" file1 = "web-pages/eztv-1.html" file2 = "web-pages/eztv-2.html" r1 = load_fragments(file1,fragments) r2 = load_fragments(file2,fragments) result = silent_simm(r1,r2) print("result:",result * 100)