Estoy haciendo una función de conversión de documento (docx) a archivo html usando OpenXMLPowerTools, pero tengo una imagen .emf atascada.

Todas las imágenes del documento son imágenes .emf y no se pueden convertir a html.

He investigado y parece que OpenXMLPowerTools no es compatible con esta extensión. estoy en lo cierto?

¿Alguien tiene este problema al usar estas herramientas?

Aquí está mi código actual (VB.NET):

Sub Main()

    Dim objSource As Object = Nothing
    Dim wdDoc As WordprocessingDocument
    Dim fi As FileInfo
    Dim destFileName As FileInfo
    Dim di As DirectoryInfo
    Dim strImageDirectoryName As String = String.Empty
    Dim iImageCounter As Integer = 0
    Dim part As CoreFilePropertiesPart
    Dim strPageTitle As String = String.Empty
    Dim HtmlSettings As WmlToHtmlConverterSettings
    Dim strExtension As String = String.Empty
    Dim imageFormat As ImageFormat = Nothing
    Dim strBase64 As String = String.Empty
    Dim strMimeType As String = String.Empty
    Dim format As ImageFormat
    Dim codec As ImageCodecInfo
    Dim strImageSource As String = String.Empty
    Dim img As XElement
    Dim arrByte As Byte()
    Dim htmlElement As XElement
    Dim html As XDocument
    Dim strHtml As String = String.Empty
    Dim strFilePath As String = String.Empty
    Dim strOutputDirectory As String = String.Empty
    Dim objWriter As Object = Nothing
    Dim localDirInfo As DirectoryInfo

    strFilePath = "FilePath"
    strOutputDirectory = "OutputPath"

    fi = New FileInfo(strFilePath)
    destFileName = New FileInfo(fi.Name.Replace(".docx", ".html"))

    If strOutputDirectory IsNot Nothing AndAlso strOutputDirectory <> String.Empty Then
        di = New DirectoryInfo(strOutputDirectory)
        If Not di.Exists Then
            Throw New OpenXmlPowerToolsException("Output directory does not exist")
        End If
        destFileName = New FileInfo(Path.Combine(di.FullName, destFileName.Name))
    End If

    objSource = Packaging.Package.Open(strFilePath)
    wdDoc = WordprocessingDocument.Open(objSource)

    strImageDirectoryName = destFileName.FullName.Substring(0, destFileName.FullName.Length - 5) & "_files"
    part = wdDoc.CoreFilePropertiesPart
    strPageTitle = fi.FullName

    If part IsNot Nothing Then
        strPageTitle = If(CStr(part.GetXDocument().Descendants(DC.title).FirstOrDefault()), fi.FullName)
    End If

    strImageDirectoryName = destFileName.FullName.Substring(0, destFileName.FullName.Length - 5) & "_files"

    HtmlSettings = New WmlToHtmlConverterSettings
    Dim imgPart = wdDoc.MainDocumentPart.ImageParts

    With HtmlSettings

        .PageTitle = strPageTitle
        .FabricateCssClasses = True
        .CssClassPrefix = "pt-"
        .RestrictToSupportedLanguages = False
        .RestrictToSupportedNumberingFormats = False
        .ImageHandler = Function(imageinfo)

                            localDirInfo = New DirectoryInfo(strImageDirectoryName)
                            If Not localDirInfo.Exists Then localDirInfo.Create()
                            iImageCounter += 1

                            strExtension = imageinfo.ContentType.Split("/"c)(1).ToLower()

                            If strExtension = "png" Then
                                imageFormat = ImageFormat.Png
                            ElseIf strExtension = "gif" Then
                                imageFormat = ImageFormat.Gif
                            ElseIf strExtension = "bmp" Then
                                imageFormat = ImageFormat.Bmp
                            ElseIf strExtension = "jpeg" Then
                                imageFormat = ImageFormat.Jpeg
                            ElseIf strExtension = "tiff" Then
                                strExtension = "gif"
                                imageFormat = ImageFormat.Gif
                            ElseIf strExtension = "x-wmf" Then
                                strExtension = "wmf"
                                imageFormat = ImageFormat.Wmf
                            ElseIf strExtension = "x-emf" Then
                                strExtension = "emf"
                                imageFormat = ImageFormat.Emf
                            End If

                            If imageFormat Is Nothing Then Return Nothing

                            Using ms As MemoryStream = New MemoryStream()
                                imageinfo.Bitmap.Save(ms, imageFormat)
                                arrByte = ms.ToArray()
                                strBase64 = Convert.ToBase64String(arrByte)
                            End Using

                            format = imageinfo.Bitmap.RawFormat
                            codec = ImageCodecInfo.GetImageDecoders().First(Function(c) c.FormatID = format.Guid)
                            strMimeType = codec.MimeType
                            strImageSource = String.Format("data:{0};base64,{1}", strMimeType, strBase64)
                            img = New XElement(Xhtml.img,
                                               New XAttribute(NoNamespace.src, strImageSource), imageinfo.ImgStyleAttribute,
                                               If(imageinfo.AltText IsNot Nothing, New XAttribute(NoNamespace.alt, imageinfo.AltText), Nothing))

                            Return img

                        End Function

    End With

    htmlElement = WmlToHtmlConverter.ConvertToHtml(wdDoc, HtmlSettings)
    html = New XDocument(New XDocumentType("html", Nothing, Nothing, Nothing), htmlElement)
    strHtml = html.ToString(SaveOptions.DisableFormatting)
    File.WriteAllText(destFileName.FullName, strHtml, Encoding.UTF8)

End Sub
0
Bùi Duy Phú 20 ene. 2021 a las 07:24

1 respuesta

La mejor respuesta

Ahora he convertido docx a html con imágenes emf con éxito

0
Bùi Duy Phú 2 feb. 2021 a las 10:51