html to Word Document Converter using Open XML SDK

By using Open XML SDK 2.0 you can generate the word document without using Interop dlls and without the requirement to install Office in your web server. I have done it successfully with the help of my colleagure Yesha Gupta and below is the code to generate the word document from the html code.
GenerateWordDocument()
public void generateWordDocument()
{
WindowsIdentity currentUserIdentity = (WindowsIdentity)User.Identity;
WindowsImpersonationContext impersonationContext = currentUserIdentity.Impersonate();
try
{
string fileName = currentUserIdentity.Name.Replace("\\", "_") + "_Report.docx";
string filePath = "~/assets/Templates/" + fileName;
File.Copy(Server.MapPath("~/assets/Templates/WordDocTemplate.docx"), Server.MapPath(filePath));

DataView view = frontMatterSqlDataSource.Select(DataSourceSelectArguments.Empty) as DataView;

string pageTitle = "";

using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(Server.MapPath(filePath),true))
{
MainDocumentPart mainPart = wordDoc.MainDocumentPart;
int altChunkIdCounter = 1;
int blockLevelCounter = 1;
pageTitle = view.Table.Rows[0]["PageTitle"].ToString();
string htmlMarkup = "<h1>"+view.Table.Rows[0]["PageTitle"].ToString()+"</h1>";

for (int i = 0; i < view.Table.Rows.Count; i++)
{
htmlMarkup += "<h2>"+view.Table.Rows[i]["SectionHeader"].ToString()+"</h2>";
htmlMarkup += view.Table.Rows[i]["SectionMarkup"].ToString();
}
string mainhtml = "<html><head><style type='text/css'>.catalogGeneralTable{border-collapse: collapse;text-align: left;} .catalogGeneralTable td, th{ padding: 5px; border: 1px solid #999999; }</style></head><body style='font-family:Trebuchet MS;font-size:.9em;'>" + htmlMarkup + "</body></html>";
string altChunkId = String.Format("AltChunkId{0}", altChunkIdCounter++);

//Import data as html content using Altchunk
AlternativeFormatImportPart chunk = mainPart.AddAlternativeFormatImportPart(AlternativeFormatImportPartType.Html, altChunkId);

using (Stream chunkStream = chunk.GetStream(FileMode.Create,FileAccess.Write))
{
using (StreamWriter stringWriter = new StreamWriter(chunkStream,Encoding.UTF8)) //Encoding.UTF8 is important to remove special characters
{
stringWriter.Write(mainhtml);                        
}                    
}

AltChunk altChunk = new AltChunk();
altChunk.Id = altChunkId;

mainPart.Document.Body.InsertAt(altChunk, blockLevelCounter++);
mainPart.Document.Save();                
}
DownloadFile(Server.MapPath(filePath), pageTitle.Replace(" ","_") +"_Catalog2010-2011"+ ".docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document");

//Delete File once download is complete
File.Delete(Server.MapPath(filePath));



}
catch (Exception ex)
{
Response.Write(ex.Message.ToString());
}
finally
{
if (impersonationContext != null)
{
impersonationContext.Undo();
}
}
}


DownloadFile()

private void DownloadFile(string completeFilePath, string fileName, string contentType)
{
Stream iStream = null;

// Buffer to read 10K bytes in chunk:
byte[] buffer = new Byte[10000];

// Length of the file:
int length;

// Total bytes to read:
long dataToRead;

try
{
// Open the file.
iStream = new FileStream(completeFilePath, FileMode.Open,
FileAccess.Read, FileShare.Read);

// Total bytes to read:
dataToRead = iStream.Length;

Response.ContentType = contentType;
Response.AddHeader("Content-Disposition", "attachment; filename=" + fileName);

// Read the bytes.
while (dataToRead > 0)
{
// Verify that the client is connected.
if (Response.IsClientConnected)
{
// Read the data in buffer.
length = iStream.Read(buffer, 0, 10000);

// Write the data to the current output stream.
Response.OutputStream.Write(buffer, 0, length);

// Flush the data to the HTML output.
Response.Flush();

buffer = new Byte[10000];
dataToRead = dataToRead - length;
}
else
{
//prevent infinite loop if user disconnects
dataToRead = -1;
}
}
}
catch (Exception ex)
{
// Trap the error, if any.
Response.Write("Error : " + ex.Message);
}
finally
{
if (iStream != null)
{
//Close the file.
iStream.Close();
}
Response.Close();
}
}


For more information, post your comments below.

Comments

  1. hi

    Is it possible to convert Word Document to html using Open XML SDK

    ReplyDelete
  2. Sorry, that I'm not sure. You may have to check their documentation for that.

    ReplyDelete
  3. One drawback of this approach is that many Mac versions of Office do not support the HTML atlchunks. So far the only credible solution is to use Word Automation services (part of Sharepoint Server) to convert the openXML document with chunks to MS Word with chunks converted to native MS Word format.

    ReplyDelete

Post a Comment

Popular posts from this blog

How to: Get Top n Rows of DataView in C# asp.net