#!/usr/bin/env python import sys from the_semantic_db_code import * from the_semantic_db_functions import * from the_semantic_db_processor import * C = context_list("find common movies") # similar to context.recall(op,label), but this works with files instead of all data in memory. # Motivated by wanting to run the Kevin Bacon game on IMDB, but EC2 is too expensive to do all in mem. # So hopefully I can do it using this approach (I already have a copy of IMDB in sw format - took about a week on EC2) # # filename is sw data/source file # op is the operator label, a string # label is the ket label, a string or a ket # # returns a superposition def file_recall(filename,op,label): if type(label) == ket: coeff = label.value ket_label = label.label else: coeff = 1 ket_label = label pattern = op + " |" + ket_label + "> => " n = len(pattern) print("pattern:",pattern) print("n: ",n) with open(filename,'r') as f: for line in f: if line.startswith(pattern): # print("line:",line) # pointless noise. els() spits it out anyway! At least the current debugging version does. return extract_literal_superposition(line[n:])[0].multiply(coeff) return ket("",0) imdb_sw = "sw-examples/just-movies-imdb.sw" # our imdb data actor1 = "actor: Kevin (I) Bacon" #actor2 = "actor: Tilda Swinton" actor2 = "actor: Veronica Webb" movies1 = file_recall(imdb_sw,"movies",actor1) movies2 = file_recall(imdb_sw,"movies",actor2) common_movies = intersection(movies1,movies2) print() print("actor 1:",actor1) print("actor 2:",actor2) print() print("number of movies:",len(movies1)) print("movies 1:",movies1) print() print("number of movies:",len(movies2)) print("movies 2:",movies2) print() print("number of movies:",len(common_movies)) print("common movies:",common_movies) def find_common_movies(sw_file,one,two): actor1 = "actor: " + one actor2 = "actor: " + two movies1 = file_recall(sw_file,"movies",actor1) movies2 = file_recall(sw_file,"movies",actor2) common_movies = intersection(movies1,movies2) print("common movies for:") print(one) print(two) print("number of common movies:",len(common_movies)) print("common movies:",common_movies) print() print("####################################################") find_common_movies(imdb_sw,"Kevin (I) Bacon","Tilda Swinton") find_common_movies(imdb_sw,"Kevin (I) Bacon","Veronica Webb") find_common_movies(imdb_sw,"Kevin (I) Bacon","Nick Nolte") find_common_movies(imdb_sw,"Kevin (I) Bacon","Winona Ryder") sys.exit(0) # quick test: #sw_file = "sw-examples/fred-sam-friends.sw" #r = file_recall(sw_file,"friends","Sam") #r = file_recall(sw_file,"friends","Fred") #print("r:",r) #print("-----------------------------------------------------------------") # a single layer of the Kevin Bacon game. # one is a superposition (though should handle kets too) # returns a superposition. # # the code does: # actors movies one-superposition # eg: # actors movies |actor: Kevin Bacon> # def Kevin_Bacon_game(bacon_file,one): if type(one) == str: # make sure we have a superposition, one = superposition() + ket(one) # even if fed a string or a ket elif type(one) == ket: # Hrmm... there has to be a neater way to write this mess! one = superposition() + one # one = one.apply_sigmoid(clean) # optional to clean coeffs from our incomming sp sp1 = superposition() for x in one.data: sp1 += file_recall(bacon_file,"movies",x) sp2 = superposition() for x in sp1.data: sp2 += file_recall(bacon_file,"actors",x) print("len:",len(sp2)) return sp2.coeff_sort() # this is the full game we are trying to replicate: # kevin-bacon-0 |result> => actors movies |actor: Kevin Bacon> -- set of actors that share a movie with Kevin. # kevin-bacon-1 |result> => actors movies actors movies |actor: Kevin Bacon> -- set of actors one step removed from Kevin. # kevin-bacon-2 |result> => actors movies actors movies actors movies |actor: Kevin Bacon> -- set of actors two steps removed. # kevin-bacon-3 |result> => actors movies actors movies actors movies actors movies |actor: Kevin Bacon> -- three steps removed # ... # still stuck in the keep it all in memory method! Bad doggie! #sw_bacon_file = "sw-examples/just-movies-imdb.sw" # our imdb data #r = ket("actor: Kevin (I) Bacon") # NB: we can choose any actor we like! We have the whole damn imdb to choose from! #N = 4 # How deep we want to go. For now 4, but maybe 10 or bigger later! #for k in range(N): # r = Kevin_Bacon_game(sw_bacon_file,r) # C.learn("kevin-bacon-" + str(k),"result",r) # # #name = "sw-examples/kevin-bacon.sw" # save the results. #save_sw(C,name) # let's write a version that writes to disk as it goes. sw_bacon_file = "sw-examples/just-movies-imdb.sw" # our imdb data sw_dest_file = "sw-examples/fast-write--kevin-bacon.sw" # where we are going to save the results dest = open(sw_dest_file,'w') # fake the context header: dest.write("----------------------------------------\n") dest.write("|context> => |context: Kevin Bacon game>\n\n") # can't be bothered to fake the supported-ops line. r = ket("actor: Kevin (I) Bacon") # NB: we can choose any actor we like! We have the whole damn imdb to choose from! N = 10 # How deep we want to go. For now 4, but maybe 10 or bigger later! for k in range(N): r = Kevin_Bacon_game(sw_bacon_file,r) dest.write("kevin-bacon-" + str(k) + " |result> => " + r.display(True) + "\n") # r.display(True) for exact dump, not str(sp) version. dest.write("----------------------------------------\n") dest.close()