import urllib.request as urllib
import json
import datetime
import csv
import time

# """
#     text: a string
#     a better formatted string
# """
def unicode_normalize(text):
    return text.translate({ 0x2018:0x27,0x2019:0x27,0x201C:0x22,0x201D:0x22,0xa0:0x20 }).encode('utf-8')

# """
#     url: a request url
#     the data returned by calling that url
# """
def request_data_from_url(url):
    req = urllib.Request(url)
    success = False
    while success is False:
            #open the url
            response = urllib.urlopen(req)

            #200 is the success code for http
            if response.getcode() == 200:
                success = True
        except Exception :
            #if we didn't get a success,then print the error and wait 5 seconds before trying again
            print ("OOPS")

            print ("Error for URL %s: %s" % (url,datetime.datetime.now()))
            print ("retrying...")

    #return the contents of the response
    return response.read()

# """
#     page_id: the unique id for the facebook page you are trying to scrape
#     access_token: authentication proving that you have a valid facebook account
#     a python dictionary of the data on your requested page
# """
def get_facebook_page_data(page_id,access_token):

    website = "https://graph.facebook.com/v2.6/"

    location = "%s/posts/" % page_id

    #the .limit(0).summary(true) is used to get a summarized count of all the ...
    #...comments and reactions instead of getting each individual one
    fields = "?fields=message,name,id," + \
            "comments.limit(0).summary(true),shares," + \

    authentication = "&limit=100&access_token=%s" % (access_token)

    request_url = website + location + fields + authentication

    #converts facebook's response to a python dictionary to easier manipulate later
    data = json.loads(request_data_from_url(request_url))
    return data

# """
#     post: information about a single post on the facebook page
#     access_token: authentication proving that you have a valid facebook account
#     a list with the requested fields for this post
# """
def process_post(post,access_token):

    post_id = post['id']

    post_message = '' if 'message' not in post.keys() else \

    post_type = post['type']

    #for datetime info,we need a few extra steps
    #first convert the given datetime into the format we want
    post_published = datetime.datetime.strptime(
    #then account for the time difference between the returned time and my time zone
    post_published = post_published + \
    #last,convert the datetime into a string in a format convenient for spreadsheets
    post_published = post_published.strftime(
            '%Y-%m-%d %H:%M:%S')

    num_reactions = 0 if 'reactions' not in post else \
    num_comments = 0 if 'comments' not in post else \
    num_shares = 0 if 'shares' not in post else post['shares']['count']

    #here we call a separate API for information about reactions based on the post's post_id
    #but only if this post is afer the day when reactions first appeared on facebook
    reactions = get_reactions_for_post(post_id,access_token) if \
            post_published > '2016-02-24 00:00:00' else {}

    num_likes = 0 if 'like' not in reactions else \

    #if this post is from before reactions existed,then simply set the number of likes ...
    #...equal to the total number of reactions
    num_likes = num_reactions if post_published < '2016-02-24 00:00:00' \
            else num_likes

    #function to get total number of reactions from the reactions dictionary above
    def get_num_total_reactions(reaction_type,reactions):
        if reaction_type not in reactions:
            return 0
            return reactions[reaction_type]['summary']['total_count']

    #get counts of all reactions
    num_loves = get_num_total_reactions('love',reactions)
    num_wows = get_num_total_reactions('wow',reactions)
    num_hahas = get_num_total_reactions('haha',reactions)
    num_sads = get_num_total_reactions('sad',reactions)
    num_angrys = get_num_total_reactions('angry',reactions)

    #return a list of all the fields we asked for
    return (post_id,post_message,post_type,post_published,num_reactions,num_comments,num_shares,num_likes,num_loves,num_wows,num_hahas,num_sads,num_angrys)

# """
#     post_id: a post id corresponding to a particular post
#     access_token: authentication proving that you have a valid facebook account
#     a python dictionary of information about the reactions associated to this post
# """
def get_reactions_for_post(post_id,access_token):

    website = "https://graph.facebook.com/v2.6"

    location = "/%s" % post_id

    #here we ask for the number of reactions of each time associated with this post
    reactions = "/?fields=" \
            "reactions.type(LIKE).limit(0).summary(total_count).as(like)" \
            ",reactions.type(LOVE).limit(0).summary(total_count).as(love)" \
            ",reactions.type(WOW).limit(0).summary(total_count).as(wow)" \
            ",reactions.type(HAHA).limit(0).summary(total_count).as(haha)" \
            ",reactions.type(SAD).limit(0).summary(total_count).as(sad)" \

    authentication = "&access_token=%s" % access_token

    request_url = website + location + reactions + authentication

    # retrieve data and store in python dictionary
    data = json.loads(request_data_from_url(request_url))

    return data

# """
#     page_id: the unique id for the facebook page you are trying to scrape
#     access_token: authentication proving that you have a valid facebook account
#     nothing,simply prints how many posts were processed and how long it took
# """
def scrape_facebook_page(page_id,access_token):
    #open up a csv (comma separated values) file to write data to
    with open('%s_facebook_posts.csv' % page_id,'w') as file:
        #let w represent our file
        w = csv.writer(file)

        #write the header row

        has_next_page = True
        num_processed = 0
        scrape_starttime = datetime.datetime.now()

        print ( "Scraping %s Facebook Page: %s\n" % (page_id,scrape_starttime))

        #get first batch of posts
        posts = get_facebook_page_data(page_id,access_token)

        #while there is another page of posts to process
        while has_next_page:
            #we just limit to 200 posts for simplicity,if you want all the posts,just remove this
            if num_processed == 200:

            #for each individual post in our retrieved posts ...
            for post in posts['data']:

                #...get post info and write to our spreadsheet

                num_processed += 1

            #if there is a next page of posts to get,then get next page to process
            if 'paging' in posts.keys():
                posts = json.loads(request_data_from_url(
            #otherwise,we are done!
                has_next_page = False

        print ("Completed!\n%s posts Processed in %s" % \
                (num_processed,datetime.datetime.now() - scrape_starttime))

page_id = input("Please Paste Public Page Name:")

access_token = input("Please Paste Your access Token:")

if __name__ == '__main__':

