Why Convert Powerpoint Files to JSON?
Microsoft Powerpoint is a well known, robust, easy to use tool for creating powerful eye-catching presentations.
You'll find Powerpoint software installed on most machines (Windows, Apple even Android Tablets).
However, on occasion, you'll want to 'extract' or 'convert' presentations into other formats.
A great format, is the 'JSON' file format - easy to use and is compatable with Javascript.
The approach taken in this tutorial, is to create a simple 'vbs' script, which runs through the ppt (or pptx) file and extracts text, images and their associated attributes (position, font and file/image).
The implemention is a working progress - currently it does the 'main' elements, text frames, background images, and image frames. Images are also exported as '.png' files.
JSON Javascsript (HTML) Presenter
A little test program to show the exported json data in action, is a simple Javascript program that runs in a browser and creates a presentation (that emulates what you'd see in Powerpoint).
The 'pptx-to-json.vbs' exporter script has done all the work for us :D
All you need to do now, is read in the json file (which is a single line in Javascript), then run through each of the slides and place the content on the screen (using CreateElement).
ppt-to-json.vbs script is given below
-------------------------------------------------
' About
' CSCRIPT pptx-to-json.vbs inputfileName.ppt
' outputs converted json text to the command prompt
' redirect to a file using the '>' property, e.g.
' CSCRIPT pptx-to-json.vbs inputfileName.pptx > out.json
' www.xbdev.net
' bkenwright@xbdev.net
' script is relatively robust and easy to follow
' exports text and attributes, not to mention
' images/frames, backgrounds and their visual
' data as .png files
Option Explicit
'http://msdn.microsoft.com/en-us/library/office/bb251061(v=office.12).aspx
Const ppSaveAsPDF = 32
Const msoPicture = 13
Const msoFillPicture = 6
'ref: https://wutils.com/com-dll/constants/constants-Office.htm
Const msoAnchorNone = 1 '&H1
Const msoAnchorCenter = 2 '&H2
Const msoTextEffectAlignmentMixed = -2 '&HFFFFFFFE
Const msoTextEffectAlignmentLeft = 1 '&H1
Const msoTextEffectAlignmentCentered = 2 '&H2
Const msoTextEffectAlignmentRight = 3 '&H3
Const msoTextEffectAlignmentLetterJustify = 4 '&H4
Const msoTextEffectAlignmentWordJustify = 5 '&H5
Const msoTextEffectAlignmentStretchJustify = 6 '&H6
Const msoVerticalAnchorMixed = -2 '&HFFFFFFFE
Const msoAnchorTop = 1 '&H1
Const msoAnchorTopBaseline = 2 '&H2
Const msoAnchorMiddle = 3 '&H3
Const msoAnchorBottom = 4 '&H4
Const msoAnchorBottomBaseLine = 5 '&H5
Dim oFSO ' Public reference to FileSystemObject
Dim oPPT ' Public reference to PowerPoint App
Dim oSlide ' As Object
Dim oShape ' As Object
Dim ppt
Dim scriptdir
Dim i ' As Integer
Dim x
Dim bMasterShapes
Dim linetxt
Dim shapeno
Dim slideno
Main ' call Main function below
Sub Main()
Dim sInput
If wscript.Arguments.Count <> 1 Then
Wscript.Echo "You need to specify input and output files."
wscript.Quit
End If
' PowerPoint version must be 12 or later (PowerPoint 2007 or later)
Set oPPT = CreateObject("PowerPoint.Application")
If CDbl(oPPT.Version) < 12 Then
Wscript.Echo "PowerPoint version must be 2007 or later!"
oPPT.Visible = True
oPPT.Quit
Set oPPT = Nothing
wscript.Quit
End If
scriptdir = CreateObject("Scripting.FileSystemObject").GetParentFolderName(WScript.ScriptFullName)
' Store Input Argument and detect execute mode (single file / Folder batch mode)
sInput = wscript.Arguments(0)
Wscript.Echo "Input: " & scriptdir & "\" & sInput
oPPT.Visible = True
oPPT.WindowState = 2
' Set ppt = oPPT.Presentations.Open("C:\Users\Computer\Downloads\pptx-extract\cats.pptx")
Set ppt = oPPT.Presentations.Open( scriptdir & "\" & sInput )
Set oFSO = CreateObject("Scripting.FileSystemObject")
If NOT (oFSO.FolderExists(scriptdir & "\imgs")) Then
' Create folder
oFSO.CreateFolder( scriptdir & "\imgs" )
End If
Wscript.Echo "json = "
Wscript.Echo "{ "
Wscript.Echo " PageSetup: {"
'Wscript.Echo "PageSetup:"
Wscript.Echo " SlideWidth: " & oPPT.ActivePresentation.PageSetup.SlideWidth & ","
Wscript.Echo " SlideHeight: " & oPPT.ActivePresentation.PageSetup.SlideHeight & ""
Wscript.Echo " },"
Wscript.Echo " Slides: ["
' ref: https://docs.microsoft.com/en-us/office/vba/api/powerpoint.paragraphformat.alignment
'For Each oSlide In oPPT.ActivePresentation.Slides
For slideno = 1 To oPPT.ActivePresentation.Slides.Count
Dim oSlide
Set oSlide = oPPT.ActivePresentation.Slides( slideno )
'Wscript.Echo "Slide: " & oSlide.SlideIndex & " :--------"
Wscript.Echo " ["
' Wscript.Echo "BG Type: " & oSlide.Background.Fill.Type
If oSlide.Background.Fill.Type = msoFillPicture Then
'Wscript.Echo "bimage: " & "\imgs\Slide" & oSlide.SlideIndex & ".png"
Wscript.Echo " {"
Wscript.Echo " type:" & "'bimage'" & ","
Wscript.Echo " file:" & "'BSlide" & oSlide.SlideIndex & ".png'"
Wscript.Echo " }"
If oSlide.Shapes.Count > 0 Then
Wscript.Echo " ,"
End If
' ref: https://stackoverflow.com/questions/5316459/programmatically-combine-slides-from-multiple-presentations-into-a-single-presen
If oSlide.Shapes.Count > 0 Then oSlide.Shapes.Range.Visible = False
bMasterShapes = oSlide.DisplayMasterShapes
oSlide.DisplayMasterShapes = False
oSlide.Export oPPT.ActivePresentation.Path & "\imgs\BSlide" & oSlide.SlideIndex & ".png", "PNG"
'oSlide.Background.Fill.UserPicture oPPT.ActivePresentation.Path & "\" & oSlide.SlideID & ".png"
'Kill (SrcPPT.Path & SrcSld.SlideID & ".png")
oSlide.DisplayMasterShapes = bMasterShapes
If oSlide.Shapes.Count > 0 Then oSlide.Shapes.Range.Visible = True
End If
'For Each oShape In oSlide.Shapes
For shapeno = 1 To oSlide.Shapes.Count
Dim oShape
Set oShape = oSlide.Shapes( shapeno )
If oShape.Type = 1 Or oShape.Type = 14 Then
'Wscript.Echo "line: " & oShape.TextFrame.TextRange.Lines.Count
'Wscript.Echo "line: " & oShape.TextFrame.TextRange.Lines(0)
'Wscript.Echo "textno: " & oShape.TextFrame.TextRange.Lines.Count
Dim txt
txt = ""
For x = 1 To oShape.TextFrame.TextRange.Lines.Count
Set linetxt = oShape.TextFrame.TextRange.Lines( x )
' replace any escape characters
Dim regExp
Set regExp = New RegExp
regExp.IgnoreCase = True
regExp.Global = True
regExp.Pattern = "[^a-z0-9 !?@]" 'Add here every character you don't consider as special character
linetxt = regExp.Replace(linetxt, "?")
'Wscript.Echo " text: " & oShape.TextFrame.TextRange.Lines( x )
txt = txt & linetxt & "\\n"
Next
Wscript.Echo " {"
Wscript.Echo " type:" & "'text'" & ","
Wscript.Echo " txt:'" & txt & "',"
Wscript.Echo " left:" & oShape.Left & ","
Wscript.Echo " top:" & oShape.Top & ","
Wscript.Echo " width:" & oShape.Width & ","
Wscript.Echo " height:" & oShape.Height & ","
Wscript.Echo " zorder:" & oShape.ZOrderPosition & ","
' ref: https://docs.microsoft.com/en-us/office/vba/api/powerpoint.paragraphformat.alignment
' ref: https://docs.microsoft.com/en-us/office/vba/api/powerpoint.textframe.verticalanchor
Wscript.Echo " halignment: " & oShape.TextFrame.TextRange.ParagraphFormat.Alignment & ","
'Wscript.Echo " halignment: " & oShape.TextFrame.HorizontalAnchor & "," ' msoAnchorCenter
Wscript.Echo " valignment: " & oShape.TextFrame.VerticalAnchor & "," ' = msoAnchorTop
' font details
Wscript.Echo " size: " & oShape.TextFrame.TextRange.Font.Size & ","
Wscript.Echo " font: '" & oShape.TextFrame.TextRange.Font.Name & "'," ' = "Palatino"
Wscript.Echo " bold: " & oShape.TextFrame.TextRange.Font.Bold & "," ' = True
Wscript.Echo " rgb: " & oShape.TextFrame.TextRange.Font.Color.RGB & "," ' = RGB(255, 127, 255)
' bullet details
Wscript.Echo " bulletvisible: " & oShape.TextFrame.TextRange.ParagraphFormat.Bullet.Visible & ","' = True
Wscript.Echo " bulletsize: " & oShape.TextFrame.TextRange.ParagraphFormat.Bullet.RelativeSize & "," ' = 1.25
Wscript.Echo " bulletcolor: " & oShape.TextFrame.TextRange.ParagraphFormat.Bullet.Font.Color ' = RGB(255, 0, 255)
Wscript.Echo " }"
End If
If oShape.Type = msoPicture Then
Wscript.Echo " {"
Wscript.Echo " type: " & "'image'" & ","
Wscript.Echo " file: " & "'Slide" & oSlide.SlideIndex & "-" & shapeno & ".png'" & ","
Wscript.Echo " left:" & oShape.Left & ","
Wscript.Echo " top:" & oShape.Top & ","
Wscript.Echo " width:" & oShape.Width & ","
Wscript.Echo " height:" & oShape.Height & ","
Wscript.Echo " zorder:" & oShape.ZOrderPosition
Wscript.Echo " }"
oShape.Export oPPT.ActivePresentation.Path & "\imgs\Slide" & oSlide.SlideIndex & "-" & shapeno & ".png", 2, , , 1
i = i + 1
End If
If shapeno < oSlide.Shapes.Count Then
Wscript.Echo " ,"
End If
Next ' oShape
Wscript.Echo " ]"
If slideno < oPPT.ActivePresentation.Slides.Count Then
Wscript.Echo " ,"
End If
Next ' oSlide
Wscript.Echo " ]"
Wscript.Echo "}"
oPPT.Quit
Set oPPT = Nothing
End Sub
' end of script
Example (Cats)
To test out the converted/extract ppt to json - I wrote a simple Javascript 'presentation' page.
The code is functional, loads and display the text, images, and lets you scroll through the slides using a set of buttons in the top left of the screen, or the cursor keys (left/right on the keyboard).
Here is the working 'cat.pptx' presentation converted to a 'test.json' file.
Web Presentation
To see the Web Presentation Javascript (parses JSON data and displays the presentation in a webpage), simply select 'view source' in your browser.
Future Features
The current example has the 'core' components - everything you need to create a high quality state of the art web presentation tool.
Some ideas to juice up the implementation, include:
* pre-loading images or fonts
* transition animations between slides
* support extra formats (svgs)
* allow embedded iframes (great tool for interactive web presentations)
* write a powerpoint 'importer', so you can go back the other way (json-to-ppt.vbs)
* allow videos to be embedded into the web slides
* support code syntaxing (e.g., highlighter.js package - plug and play)
* markdown (or some other readable syntax) input system for your presentation - so users could create slides using a simple text based langauge (converted to json or parsed on the fly)
|