#!/usr/bin/env python

import sys

from the_semantic_db_code import *
from the_semantic_db_functions import *
from the_semantic_db_processor import *

C = context_list("find common movies")


# similar to context.recall(op,label), but this works with files instead of all data in memory.
# Motivated by wanting to run the Kevin Bacon game on IMDB, but EC2 is too expensive to do all in mem.
# So hopefully I can do it using this approach (I already have a copy of IMDB in sw format - took about a week on EC2)
#
# filename is sw data/source file
# op is the operator label, a string
# label is the ket label, a string or a ket
#
# returns a superposition
def file_recall(filename,op,label):
  if type(label) == ket:
    coeff = label.value
    ket_label = label.label
  else:
    coeff = 1
    ket_label = label

  pattern = op + " |" + ket_label + "> => "
  n = len(pattern)
  print("pattern:",pattern)
  print("n:      ",n)

  with open(filename,'r') as f:
    for line in f:
      if line.startswith(pattern):
#        print("line:",line)          # pointless noise. els() spits it out anyway! At least the current debugging version does.
        return extract_literal_superposition(line[n:])[0].multiply(coeff)
  return ket("",0)


imdb_sw = "sw-examples/just-movies-imdb.sw"    # our imdb data

actor1 = "actor: Kevin (I) Bacon"
#actor2 = "actor: Tilda Swinton"
actor2 = "actor: Veronica Webb"
movies1 = file_recall(imdb_sw,"movies",actor1)
movies2 = file_recall(imdb_sw,"movies",actor2)
common_movies = intersection(movies1,movies2)

print()
print("actor 1:",actor1)
print("actor 2:",actor2)
print()
print("number of movies:",len(movies1))
print("movies 1:",movies1)
print()
print("number of movies:",len(movies2))
print("movies 2:",movies2)
print()
print("number of movies:",len(common_movies))
print("common movies:",common_movies)


def find_common_movies(sw_file,one,two):
  actor1 = "actor: " + one
  actor2 = "actor: " + two
  movies1 = file_recall(sw_file,"movies",actor1)
  movies2 = file_recall(sw_file,"movies",actor2)
  common_movies = intersection(movies1,movies2)
  
  print("common movies for:")
  print(one)
  print(two)
  print("number of common movies:",len(common_movies))
  print("common movies:",common_movies)
  print()

print("####################################################")

find_common_movies(imdb_sw,"Kevin (I) Bacon","Tilda Swinton")
find_common_movies(imdb_sw,"Kevin (I) Bacon","Veronica Webb")
find_common_movies(imdb_sw,"Kevin (I) Bacon","Nick Nolte")
find_common_movies(imdb_sw,"Kevin (I) Bacon","Winona Ryder")


sys.exit(0)

# quick test:
#sw_file = "sw-examples/fred-sam-friends.sw"
#r = file_recall(sw_file,"friends","Sam")
#r = file_recall(sw_file,"friends","Fred")
#print("r:",r)
#print("-----------------------------------------------------------------")


# a single layer of the Kevin Bacon game.
# one is a superposition (though should handle kets too)
# returns a superposition.
#
# the code does: 
#   actors movies one-superposition
# eg: 
#   actors movies |actor: Kevin Bacon>
#
def Kevin_Bacon_game(bacon_file,one):
  if type(one) == str:                                           # make sure we have a superposition,
    one = superposition() + ket(one)                             # even if fed a string or a ket
  elif type(one) == ket:                                         # Hrmm... there has to be a neater way to write this mess!
    one = superposition() + one

#  one = one.apply_sigmoid(clean)                                 # optional to clean coeffs from our incomming sp


  sp1 = superposition()
  for x in one.data:
    sp1 += file_recall(bacon_file,"movies",x)

  sp2 = superposition()
  for x in sp1.data:
    sp2 += file_recall(bacon_file,"actors",x)

  print("len:",len(sp2))
  return sp2.coeff_sort()


# this is the full game we are trying to replicate:
# kevin-bacon-0 |result> => actors movies |actor: Kevin Bacon>                             -- set of actors that share a movie with Kevin.
# kevin-bacon-1 |result> => actors movies actors movies |actor: Kevin Bacon>               -- set of actors one step removed from Kevin.
# kevin-bacon-2 |result> => actors movies actors movies actors movies |actor: Kevin Bacon> -- set of actors two steps removed.
# kevin-bacon-3 |result> => actors movies actors movies actors movies actors movies |actor: Kevin Bacon> -- three steps removed
# ...


# still stuck in the keep it all in memory method! Bad doggie!
#sw_bacon_file = "sw-examples/just-movies-imdb.sw"    # our imdb data
#r = ket("actor: Kevin (I) Bacon")                    # NB: we can choose any actor we like! We have the whole damn imdb to choose from!
#N = 4                                                # How deep we want to go. For now 4, but maybe 10 or bigger later!
#for k in range(N):
#  r = Kevin_Bacon_game(sw_bacon_file,r)
#  C.learn("kevin-bacon-" + str(k),"result",r)
#
#
#name = "sw-examples/kevin-bacon.sw"                  # save the results.
#save_sw(C,name)


# let's write a version that writes to disk as it goes.
sw_bacon_file = "sw-examples/just-movies-imdb.sw"    # our imdb data
sw_dest_file = "sw-examples/fast-write--kevin-bacon.sw" # where we are going to save the results
dest = open(sw_dest_file,'w')

# fake the context header:
dest.write("----------------------------------------\n")
dest.write("|context> => |context: Kevin Bacon game>\n\n")
# can't be bothered to fake the supported-ops line.


r = ket("actor: Kevin (I) Bacon")                    # NB: we can choose any actor we like! We have the whole damn imdb to choose from!
N = 10                                                # How deep we want to go. For now 4, but maybe 10 or bigger later!
for k in range(N):
  r = Kevin_Bacon_game(sw_bacon_file,r)
  dest.write("kevin-bacon-" + str(k) + " |result> => " + r.display(True) + "\n")  # r.display(True) for exact dump, not str(sp) version.
dest.write("----------------------------------------\n")
dest.close()