#!/opt/python3/bin/python3

import sys

from the_semantic_db_code import *
from the_semantic_db_functions import *
from the_semantic_db_processor import *

C = context_list("average movie rating")

one = sys.argv[1]

def file_recall(filename,op,label):
  if type(label) == ket:
    coeff = label.value
    ket_label = label.label
  else:
    coeff = 1
    ket_label = label

  pattern = op + " |" + ket_label + "> => "
  n = len(pattern)
#  print("pattern:",pattern)
#  print("n:      ",n)

  with open(filename,'r') as f:
    for line in f:
      if line.startswith(pattern):
#        print("line:",line)
        return extract_literal_superposition(line[n:])[0].multiply(coeff)
  return ket("",0)


imdb_sw = "sw-examples/imdb.sw"

# this file only has movies with 10k or more votes:
#ratings_sw = "sw-examples/imdb-ratings.sw"

# this is the full set:
votes_sw = "sw-examples/complete-imdb-ratings.sw"

# this is the full set, but only imdb-rating-self entries, to speed it up by roughly 4 I think.
ratings_sw = "sw-examples/imdb-ratings-self-only.sw"


actor = "actor: " + one
raw_movies = file_recall(imdb_sw,"movies",actor).collapse().apply_sigmoid(clean)

movie_ratings = superposition()
movie_votes = superposition()
for x in raw_movies.data:
  movie_ratings += file_recall(ratings_sw,"imdb-rating-self",x)
  movie_votes += file_recall(votes_sw,"imdb-votes-self",x)

movie_ratings = movie_ratings.coeff_sort()
movie_votes = movie_votes.coeff_sort()

# find the average movie rating for this actor:
count = movie_ratings.count()
count_sum = movie_ratings.count_sum()
average = 0
if count > 0:
  average = count_sum / count

# find the weighted average movie rating for this actor:
weighted_votes = multiply(movie_votes,movie_ratings) # I think this is the first use of multiply superpostions together.
weighted_count = movie_votes.count_sum()
weighted_count_sum = weighted_votes.count_sum()
weighted_average = 0
if weighted_count > 0:
  weighted_average = weighted_count_sum / weighted_count


print("============================")
print("actor:",one)
print("number of movies:",len(movie_ratings))
print("ratings:")
print(movie_ratings.long_display())
print("votes:")
print(movie_votes.long_display())
print("average movie rating:","%.2f" % average)
print("weighted average move rating:","%.2f" % weighted_average)