Convert Mac Pages to HTML in Powershell
# A very crude Mac Pages to HTML converter in Powershell
# Preserves no formatting save for line breaks.
# Useful to extract the text so you can paste into Word
# A .pages file is a Zip; extract it to get the index.xml file
# and parse out the paragraphs.
# Note there's also a preview.pdf in the zip if all you need is to print.
#
# Pass the .pages file on the command line
# .\Pages-ToHtml.ps1 '.\Path\To\File.pages' > Converted.html
#
# Requires PSCX
Import-Module PSCX
$file = (Read-Archive $args[0] -format Zip ) | Where-Object { $_.Path -ieq "index.xml" } | Expand-Archive -PassThru
$xml = [xml](Get-Content $file)
$layout = $xml.document."text-storage"."text-body".section.layout
# Stripped of all formatting
#$layout.InnerText
# Write paragraphs only
$layout.p | ForEach-Object {
echo "<p>"
$_."#text"
echo
"</p>"
}
No comments:
Post a Comment