#!/opt/python3/bin/python3 import sys from the_semantic_db_code import * from the_semantic_db_functions import * from the_semantic_db_processor import * C = context_list("star studded movies") #one = sys.argv[1] def file_recall(filename,op,label): if type(label) == ket: coeff = label.value ket_label = label.label else: coeff = 1 ket_label = label pattern = op + " |" + ket_label + "> => " n = len(pattern) with open(filename,'r') as f: for line in f: if line.startswith(pattern): return extract_literal_superposition(line[n:])[0].multiply(coeff) return ket("",0) top_actors = "top-2500-well-known-actors.txt" # yup. test case seemed to work! #top_actors = "top-10-well-known-actors.txt" imdb_sw = "sw-examples/imdb-sans-actors.sw" star_studded_movies = superposition() with open(top_actors,'r') as f: for line in f: try: coeff, actor = line.split('\t') actor = "actor: " + actor.rstrip() star_studded_movies += file_recall(imdb_sw,"movies",actor).collapse().apply_sigmoid(clean) except: continue print("=========================") #print(star_studded_movies.coeff_sort().long_display()) dest = "star-studded-movies.txt" f = open(dest,'w') f.write(star_studded_movies.coeff_sort().long_display()) f.close() sys.exit(0) imdb_sw = "sw-examples/imdb.sw" # this file only has movies with 10k or more votes: #ratings_sw = "sw-examples/imdb-ratings.sw" # this is the full set: votes_sw = "sw-examples/complete-imdb-ratings.sw" # this is the full set, but only imdb-rating-self entries, to speed it up by roughly 4 I think. ratings_sw = "sw-examples/imdb-ratings-self-only.sw" bacon_sw = "sw-examples/" actor = "actor: " + one raw_movies = file_recall(imdb_sw,"movies",actor).collapse().apply_sigmoid(clean) movie_ratings = superposition() movie_votes = superposition() for x in raw_movies.data: movie_ratings += file_recall(ratings_sw,"imdb-rating-self",x) movie_votes += file_recall(votes_sw,"imdb-votes-self",x) movie_ratings = movie_ratings.coeff_sort() movie_votes = movie_votes.coeff_sort() # find the average movie rating for this actor: count = movie_ratings.count() count_sum = movie_ratings.count_sum() average = 0 if count > 0: average = count_sum / count # find the weighted average movie rating for this actor: weighted_votes = multiply(movie_votes,movie_ratings) # I think this is the first use of multiply superpostions together. weighted_count = movie_votes.count_sum() weighted_count_sum = weighted_votes.count_sum() weighted_average = 0 if weighted_count > 0: weighted_average = weighted_count_sum / weighted_count print("============================") print("actor:",one) print("number of movies:",len(movie_ratings)) print("ratings:") print(movie_ratings.long_display()) print("votes:") print(movie_votes.long_display()) print("average movie rating:","%.2f" % average) print("weighted average move rating:","%.2f" % weighted_average)