#! /usr/bin/env python
# -*- coding: utf-8 -*-
#####################      A U T H O R        ##########################
#   Copyright (C) 2010 Jack Desert                                     #
#   jackdesert556@gmail.com                                            #
#   http://www.LetsEATalready.com                                      #
#                                                                      #
######################     L I C E N S E      ##########################
#   This program is free software; you can redistribute it and/or      #
#   modify it under the terms of the GNU General Public License        #
#   as published by the Free Software Foundation; either version 2     #
#   of the License, or any later version.                              #
#                                                                      #
#   This program is distributed in the hope that it will be useful,    #
#   but WITHOUT ANY WARRANTY; without even the implied warranty of     #
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      #
#   GNU General Public License for more details.                       #
#                                                                      #
#   You should have received a copy of the GNU General Public License  #
#   along with this program; if not, write to                          #
#                                                                      #
#   the Free Software Foundation, Inc.,                                #
#   59 Temple Place - Suite 330,                                       #
#   Boston, MA   02111-1307, USA.                                      #
#                                                                      #
#######################  D E S C R I P T I O N  ########################
#   This program allows you to post to your WordPress blog right from  #
#   LyX. The input to this script is the LyXHTML output from LyX 2.0.  #
#   This script will connect using xml-rpc.                            #
#                                                                      #
####################  I N S T A LL A T I O N   #########################
#   Python is required. Tested with versin 2.6.4                       #
#   Required library: wordpresslib.py (included in .tar.gz file)       #
#   To install the wordpresslib.py library, enter the command          #
#   $ python setup.py install                                          #
#   To install LyxBlogger, enter the command                           #
#   $ sudo cp lyxblogger /usr/bin/.                                    #
#                                                                      #
#########################  R U N N I N G   #############################
#   To invoke LyxBlogger, first generate (x)html by previewing LyXHTML #
#   from LyX 2.0, or by either previewing or manually invoking eLyXer. #
#   Then cd to the directory where the (x)html files and images reside #
#   The location is shown in your browser window when you preview.     #
#   Invoke LyxBlogger as                                               #
#   $ lyxblogger input_file.(x)html                                    #
#                                                                      #
###############    C O N V E R T E R    S E T U P  #####################
#   LyxBlogger can be run from command line or as a converter from     #
#   within LyX. To set up LyxBlogger as a converter in LyX, do the     #
#   following:                                                         #
#   -------------------------------------------------------------------#
#   --------------  LyX 2.0 (internal) LyXHTML Setup   ----------------#
#   To install this script to convert from the LyX 2.0 internal xhtml  #
#   format known as LyXHTML, open LyX 2.0 and  go to                   #
#   Tools-> Preferences-> File Handling-> File Formats.                #
#   Create a file format. Name it something descriptive like           #
#   'LyxBlogger Publish LyXHTML to WordPress'                          #
#   Make sure you check 'Document Format'                              #
#   Give it a unique 'Short name' and 'Extension'                      #
#   Then click on Converters and define a new converter as follows:    #
#   From_Format: LyXHTML                                               #
#   To_Format: 'LyxBlogger Publish LyXHTML to WordPress'               #
#   Converter: lyxblogger $$i                                          #
#   Now you can invoke this converter from within LyX 2.0 by clicking  #
#   File-> Export-> 'LyxBlogger Publish LyXHTML to WordPress'          #
#   -------------------------------------------------------------------#
#   ----------        e L y X e r   S e t u p        ------------------#
#   To install this script to convert from the eLyXer html format,     #
#   open LyX (Tested with version 2.0, but should work 1.6 as well),   #
#   First install eLyXer (tested with versions 0.41 and 0.42) and      #
#   set it up as a converter in LyX. For help installing eLyXer,       #
#   see  http://www.nongnu.org/elyxer/                                 #
#   Find out what the format is called in LyX. I named mine            #
#   eLyXer_HTML to avoid confusion.                                    #
#   Now go to Tools-> Preferences-> File Handling-> File Formats.      #
#   Create a new file format. Name it something descriptive like       #
#   'LyxBlogger Publish eLyXer_HTML to WordPress'                      #
#   Make sure you check 'Document Format'                              #
#   Give it a unique 'Short name' and 'Extension'                      #
#   Then click on Converters and define a new converter as follows:    #
#   From_Format: eLyXer_HTML                                           #
#   To_Format: 'LyxBlogger Publish eLyXer_HTML to WordPress'           #
#   Converter: lyxblogger $$i                                          #
#   Now you can invoke this converter from within LyX 2.0 by clicking  #
#   File-> Export-> 'LyxBlogger Publish eLyXHTML to WordPress'         #
#                                                                      #
##################   A U T O    L O G     I N   ########################
#   To facilitate easier logging in, consider putting your             #
#   credentials in the USER DEFINED VARIABLES section  and setting     #
#   AUTO_LOGIN to True. For example, I have a blog  at                 #
#   http://zippermania.wordpress.com just for the purpose of testing   #
#   this code.                                                         #
#                                                                      #
#   AUTO_URL = 'http://zippermania.wordpress.com/xmlrpc.php'           #
#   AUTO_USER = 'lyxbloggertest'                                       #
#   AUTO_PASSWORD = 'lyxblogger'                                       #
#   AUTO_LOGIN = True                                                  #
#                                                                      #
#   Note that the AUTO_URL must end in in '/xmlrpc.php'                #
#   I invite you to test this script first with the                    #
#   credentials as given. Once you can post to zippermania, then       #
#   change the base URL to your own url.                               #
#   Alternatively, set                                                 #
#   AUTO_LOGIN = False                                                 #
#   To be prompted for your base url, username, and password.          #
#                                                                      #
#                                                                      #
############   U S E R    D E F I N E D    V A R I A BL E S   ##########
AUTO_URL = 'http://zippermania.wordpress.com/xmlrpc.php'
AUTO_USER = 'lyxbloggertest'
AUTO_PASSWORD = 'lyxblogger'
AUTO_LOGIN = True
#                                                                      #
###########  T H E    R E S T    O F    T H E    C O D E ###############
import sys, os, re
import wordpresslib
from getpass import getpass


THIS_FILE = sys.argv[0]
error_msg = ''
input_file = sys.argv[1]    # Incoming file name
IMAGE_DIR = ''            # Empty until defined otherwise
CALLED_FROM_XTERM = 'string_of_text_to_pass_when_called_from_xterm'

# If already called from xterm, run the program as normal.
# Otherwise, call the program from xterm so it's visible
if (len(sys.argv) >= 3) and (sys.argv[2] == CALLED_FROM_XTERM):
    pass    # Called correctly, so code will execute
else:
    # Spawn a new xterm window to run this program in
    # -hold means leave window open after process completes
    # -fg is foreground color
    # -bg is background color
    # -fn is font (size)
    # -e means call a program
    os.system('xterm -hold -fg gold -bg black -fn 10x20 -e %s %s %s ' \
        % (THIS_FILE, input_file, CALLED_FROM_XTERM))
    sys.exit(0)     # Exit so program is not repeated.

print 'LYXBLOGGER'
print 'Welcome to LyxBlogger'
print 'Author: Jack Desert'
print 'Website: LetsEATalready.com\n'

if ('/' in input_file):     # If input_file is a folder and a file
    input_exp = re.compile('..{1,}/')   # Greedy to catch full folder
    input_obj = input_exp.match(input_file) # Must match at beginning of expression
    IMAGE_DIR = input_obj.group()

# Define Which Format to Use
print "FORMAT"
ENGINE_ELYXER, ENGINE_INTERNAL = False, False
if(input_file[-6:] == '.xhtml'):
    print "Input file ends in xhtml. Assuming this came from LyXHTML"
    ENGINE_INTERNAL = True
elif(input_file[-5:] == '.html'):
    print "Input file ends in html. Assuming this came from eLyXer"
    ENGINE_ELYXER = True
else:
    error_msg += 'Error: Input file must be of type .xhtml or .html/n/n'
    raise Exception(error_msg)

# Read data from file
f = open(input_file, 'r')
html = f.read()
f.close()

# RECORD TITLE FROM HEADER TO USE AS POST
tit_exp = re.compile('''
    <title>         # Start of the <title> tag
    ..{1,}?         # Anything in the middle (non-greedy)
    </title>        # Closing </title> tag
    ''', re.VERBOSE)    # VERBOSE allows ''' '''
tit_obj = tit_exp.search(html)

print "\nTITLE"
if(tit_obj):
    full_title_tag = tit_obj.group()
    blog_title = full_title_tag[7:-8]   # Strip tags off
else:
    print 'No title found in document.'
    print 'Please enter a title now'
    blog_title = sys.stdin.readline().replace('\n', '')
print 'Using title: ' + blog_title


# REMOVING TITLE FROM BODY
# Typical body title using ENGINE_INTERNAL:
#   <h1 class="title"><a id='magicparlabel-309' />
#   Example Article Title</h1>
#   <h1 class="title">
# Typical body title using ENGINE_ELYXER using optional sizing:
#   <h1 class="title">
#   <span class="footnotesize">Hi Brian</span>
#
#   </h1>
exp = re.compile('''
    <h1\                   # Beginning of tag with space
    class="title">         # The rest of the tag
    ..{1,}?                # Anything (non-greedy)
    </h1>                  # Closing tag
    ''', re.VERBOSE | re.DOTALL)                 # .. can include linebreaks
bt_obj = exp.search(html)
if(bt_obj):
    entire_bt_tag = bt_obj.group()
    html = html.replace(entire_bt_tag, '')
    print "Title removed from body of post."


# Eliminate everything outside the <body></body> tags
START_TAG = '<body>'
END_TAG = '</body>'
if (START_TAG in html):
    html = html.partition(START_TAG)[2]
html = html.partition(END_TAG)[0]

# Reinvoke <code> and </code> tags from their escape sequence counterparts
html = html.replace('&lt;code&gt;', '<code>')
html = html.replace('&lt;/code&gt;', '</code>')

# Remove Arrows from footnotes and margin notes
html = html.replace('[→', '[')
html = html.replace('→]', ']')


# Strip off cut material using the flag '#! CUT MATERIAL'
CUT_FLAG = '#! CUT MATERIAL'
print "\nCUT_FLAG"
print "Anything placed after the CUT_FLAG in your document will not be uploaded."
print "This is helpful for keeping notes that you might put back in a later draft."
if (ENGINE_INTERNAL):
    # INTERNAL uses a magicparlabel-num
    exp = re.compile('<div class="\D{1,}?"><a id=\'magicparlabel-\d{1,}\' />\n' + CUT_FLAG)
elif(ENGINE_ELYXER):
    # ELYXER may put a <span> tag in if you change the size
    exp = re.compile('<div class="\D{1,}?">\n(<span class="\D{1,}?">){0,1}?' + CUT_FLAG)
srch_obj = exp.search(html)
if(srch_obj):
    start_index = srch_obj.start()
    # print('this expression found at location: ' + str(start_index))
    html = html[0:start_index]
    print 'The Following String was found in your document and was '
    print 'successfully used as a CUT_FLAG: '
    print CUT_FLAG + '\n'
else:
    print "Place the contents of the following line at the beginning of"
    print " a paragraph to use it as a CUT_FLAG: "
    print CUT_FLAG + '\n'




if (AUTO_LOGIN):
    wordpress_url = AUTO_URL
    user = AUTO_USER
    password = AUTO_PASSWORD
else:
    print "URL"
    print("Please enter your WordPress URL")
    print("Example: cool_site.wordpress.com")
    wordpress_url = sys.stdin.readline()
    wordpress_url = wordpress_url.replace('http://', '')
    wordpress_url = wordpress_url.replace('www.', '')
    wordpress_url = wordpress_url.replace('\n', '')
    wordpress_url = 'http://' + wordpress_url + '/xmlrpc.php'
    print "The page we'll be talking is " + wordpress_url
    print "\nUSERNAME"
    print("Please enter your WordPress username")
    user = sys.stdin.readline().replace('\n', '')
    print("Username is " + user + '.')
    print "\nPASSWORD"
    print("Please enter your WordPress password")
    password = getpass()
    print "Thank you."

# prepare client object
wp = wordpresslib.WordPressClient(wordpress_url, user, password)

# select blog id
wp.selectBlog(0)


print '\nCATEGORY'
print 'Retrieving Categories From Server'
cat_list = wp.getCategoryList()
cat_counter = 1
for cat in cat_list:
    print str(cat_counter) + '.  ' + cat.name
    cat_counter += 1
cat_id = None
while (1):
    try:
        print 'Please enter the NUMBER next to the category for this post'
        cat_response = sys.stdin.readline().replace('\n', '')
        cat = int(cat_response)
        cat_id = cat_list[cat-1].id
        print 'Category Selected: ' + cat_list[cat-1].name + '\n'
        break
    except:
        print "Category Response Not Understood.\n"




# Find local location of a single image within the (x)html file

if (ENGINE_INTERNAL):
# INTERNAL img tags look something like this:
# <img src='0_home_jd_Escritorio_rv-8_tiny.jpg' alt='image: 0_home_jd_Escritorio_rv-8_tiny.jpg' />
    img_exp = re.compile('''
        <img\ src='     # The beginning of an <img> tag -- note the escaped space in the verbose regex
        (?!http://)     # Negative lookahead expression (if it has http:// it's already been changed to web reference)
        ..*?            # Non-greedy (short as possible match) of stuff in middle
        />              # The closing of the <img> tag
        ''', re.VERBOSE)
elif(ENGINE_ELYXER):
# eLyXer img tags looks something like this:
# <img class="embedded" src="rv-8_tiny.jpg" alt="figure rv-8_tiny.jpg" style="max-width: 2048px; max-height: 1536px; "/>
# Notice ELYXER uses double quotes instead of single quotes within the tag.
    img_exp = re.compile('''
        <img\ class="embedded"\          # The beginning of an <img> tag -- note two escaped spaces
        src="           # Note use of double quotes instead of single
        (?!http://)     # Negative lookahead expression (if it has http:// it's already been changed to web reference)
        ..*?            # Non-greedy (short as possible match) of stuff in middle
        />              # The closing of the <img> tag
        ''', re.VERBOSE)





img_obj = img_exp.search(html)
imageSrc = None
if(img_obj):
    print 'IMAGES\nFirst We\'ll Upload Your Images'
while(img_obj):
    img_start_index = img_obj.start()
    img_end_index = img_obj.end()
    img_tag = img_obj.group()
    # Find local address of image
    # The only difference between the two is single vs double quotes
    if (ENGINE_INTERNAL):
        add_exp = re.compile('''
            src='   # The beginning of the address
            ..*?    # Non-greedy rest of the address
            '       # The (first) closing (single) quotation mark
            ''', re.VERBOSE)
    elif (ENGINE_ELYXER):
        add_exp = re.compile('''
            src="   # The beginning of the address
            ..*?    # Non-greedy rest of the address
            "       # The (first) closing (double) quotation mark
            ''', re.VERBOSE)

    add_obj = add_exp.search(img_tag)
    if (add_obj == None):
        print "Error parsing img tag: " + img_tag
        msg = "LyxBlogger failed to find src attribute in <img> tag"
        raise Exception(msg)
    long_address = add_obj.group()
    short_address = long_address[5:-1]  # Strip off the src="

    filesize = str(os.path.getsize(IMAGE_DIR + short_address) / 1024) + ' kB'
    print("Uploading image: " + short_address + '.  Size: ' + filesize )
    # upload image for post
    imageSrc = wp.newMediaObject(IMAGE_DIR + short_address)
    html = html.replace(short_address, imageSrc)
    img_obj = img_exp.search(html)          # Note this is a repeat.


# create post object
post = wordpresslib.WordPressPost()
post.title = blog_title
post.description = html

# I have no idea why this takes a tuple (something, )
post.categories = (cat_id,)
# publish post
print '\nWORDS\nNow We\'ll Upload Your Thoughts'
filesize = str(os.path.getsize(input_file) / 1024) + ' kB'
print("Uploading xhtml: " + input_file + '.  Size: ' + filesize )
idNewPost = wp.newPost(post, True)


print '\nSUCCESS!'
print 'Upload Successful.'
print 'Thank you for using LyxBlogger.\n'
print("SHIFT + PAGE UP scrolls screen")
print("ALT + F4 closes this window")


