#!/usr/bin/python

import urllib2
import re, htmlentitydefs
import sys, getopt, os

def unescape(text, dont_process_named_entities = False):
    def fixup(m):
        text = m.group(0)
        if text[:2] == "&#":
            # character reference
            try:
                if text[:3] == "&#x":
                    return unichr(int(text[3:-1], 16))
                else:
                    return unichr(int(text[2:-1]))
            except ValueError:
                pass
        else:
            # named entity
            if not dont_process_named_entities:
		try:
			text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
		except KeyError:
			pass
        return text # leave as is
    return re.sub("&#?\w+;", fixup, text)


def yahoo_get_cookie(login, password):
	try:
		token_data = yahoo_url_request(url = "https://login.yahoo.com/config/pwtoken_get?src=ymsgr&login=%s&passwd=%s" % (login, password))
		token = token_data.split()[1].split('ymsgr=')[1]
		assert token != ""
	except:
		raise Exception('Could not fetch login token')

	try:
		cookie_data = yahoo_url_request(url = "https://login.yahoo.com/config/pwtoken_login?src=ymsgr&token=%s" % token)
		cookie_data = cookie_data.split()
		cookie = "%s%s" % (cookie_data[2], cookie_data[5])
		assert cookie != ""
	except:
		raise Exception('Could not fetch login cookie')
		
	return cookie


def yahoo_fetch_notepad(cookie, return_as_xml = False):
	try:
		notepad_data = yahoo_url_request(url = "http://api.notepad.yahoo.com/?xmlReq=dif&tags=long&type=all", headers = {'Cookie': cookie})
		assert notepad_data != ""
	except:
		raise Exception('Could not fetch notepad data')
	
	if return_as_xml:
		return unescape(notepad_data.decode('utf-8'), dont_process_named_entities = True)
	
	folder_search = re.findall('<folder [^>]+ id="([0-9]+)" [^>]+ name="([^"]+)"/>', notepad_data)
	all_folders = dict(folder_search)
	
	note_search = re.findall('<note [^>]+ text="([^"]+)" folder="([0-9]+)"/>', notepad_data)
	all_notes = []
	
	for note,folder in note_search:
		note = unescape(note.decode('utf-8'))
		# note's name is its first line
		name = re.search('([^\r\n]+)', note).groups()[0]

		all_notes.append({'folder': folder, 'note': note, 'name': name})
	
	return {'folders': all_folders, 'notes': all_notes}


def yahoo_url_request(url = "", headers = {}):
	opener = urllib2.build_opener(urllib2.HTTPCookieProcessor())
	urllib2.install_opener(opener)

	request = urllib2.Request(url)

	for k in headers:
		request.add_header(k, headers[k])

	opener.addheaders = [('User-Agent', 'Mozilla/5.0 (Macintosh; U; PPC Mac OSX; en) AppleWebKit/412 (KHTML, like Gecko) Safari/412')]
	data = opener.open(request).read()

	return data


def command_line_usage():
	print "Usage: python %s -l <yahoo_id> -p <yahoo_password> -o <output_file>" % sys.argv[0].split(os.sep)[-1]

def command_line(argv):
	yahoo_login = None
	yahoo_password = None
	output_path = None

	try:
		opts, args = getopt.getopt(argv, "l:p:o:", ["login=", "password=", "output="])

		for opt, arg in opts:
			if opt in ('-l', '--login'):
				yahoo_login = arg
			elif opt in ('-p', '--password'):
				yahoo_password = arg
			elif opt in ('-o', '--output'):
				output_path = arg	
	
		assert (yahoo_login and yahoo_password and output_path)
	except:
		command_line_usage()
		sys.exit(2)

	try:
		f = open(output_path, "w")
	except:
		print "Could not open '%s'." % output_path
		sys.exit(2)
	
	try:
		cookie = yahoo_get_cookie(yahoo_login, yahoo_password)
		notepad_data = yahoo_fetch_notepad(cookie, return_as_xml = True)
		print "Yahoo! Notepad download complete."
		
		f.write(notepad_data.encode('utf-8'))
		f.close()
		
		print "Wrote data to '%s'." % output_path
	except Exception,e:
		print 'Error: %s' % e.args[0]
		sys.exit(2)

if __name__ == "__main__":
    command_line(sys.argv[1:])

