From 04619f894bf06f58bf6745e947b639a07fc565b7 Mon Sep 17 00:00:00 2001 From: Lowell Stewart Date: Tue, 25 Apr 2023 18:46:43 -0600 Subject: [PATCH 1/3] emulate Word's whitespace handling and treatment of xml:space="preserve" --- .../DocumentAssemblerTests.cs | 27 ++++++++++++++ OpenXmlPowerTools.Tests/UnicodeMapperTests.cs | 34 ++++++++++++++++++ .../DocumentAssembler/DocumentAssembler.cs | 17 +++++++-- OpenXmlPowerTools/UnicodeMapper.cs | 21 ++++++++++- TestFiles/DA240-Whitespace.docx | Bin 0 -> 16458 bytes TestFiles/DA240-Whitespace.xml | 7 ++++ 6 files changed, 103 insertions(+), 3 deletions(-) create mode 100644 TestFiles/DA240-Whitespace.docx create mode 100644 TestFiles/DA240-Whitespace.xml diff --git a/OpenXmlPowerTools.Tests/DocumentAssemblerTests.cs b/OpenXmlPowerTools.Tests/DocumentAssemblerTests.cs index cdbdb3bb..9d17eb32 100644 --- a/OpenXmlPowerTools.Tests/DocumentAssemblerTests.cs +++ b/OpenXmlPowerTools.Tests/DocumentAssemblerTests.cs @@ -156,6 +156,24 @@ public void DA259(string name, string data, bool err) Assert.Equal(4, brCount); } + [Fact] + public void DA240() + { + string name = "DA240-Whitespace.docx"; + DA101(name, "DA240-Whitespace.xml", false); + var assembledDocx = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".docx", "-processed-by-DocumentAssembler.docx"))); + WmlDocument afterAssembling = new WmlDocument(assembledDocx.FullName); + + // when elements are inserted that begin or end with white space, make sure white space is preserved + string firstParaTextIncorrect = afterAssembling.MainDocumentPart.Element(W.body).Elements(W.p).First().Value; + Assert.Equal("Content may or may not have spaces: he/she; he, she; he and she.", firstParaTextIncorrect); + // warning: XElement.Value returns the string resulting from direct concatenation of all W.t elements. This is fast but ignores + // proper handling of xml:space="preserve" attributes, which Word honors when rendering content. Below we also check + // the result of UnicodeMapper.RunToString, which has been enhanced to take xml:space="preserve" into account. + string firstParaTextCorrect = InnerText(afterAssembling.MainDocumentPart.Element(W.body).Elements(W.p).First()); + Assert.Equal("Content may or may not have spaces: he/she; he, she; he and she.", firstParaTextCorrect); + } + [Theory] [InlineData("DA024-TrackedRevisions.docx", "DA-Data.xml")] public void DA102_Throws(string name, string data) @@ -487,6 +505,15 @@ private static string GetDocumentText(WmlDocument document) private const string WidePngBase64 = "iVBORw0KGgoAAAANSUhEUgAAAZAAAADICAIAAABJdyC1AAACuUlEQVR4nO3UMQ7CQBAEwT3EvxEvXz/BZKalqniCifrs7gAUvGfmnO/TNwBu7H5edxuAfyFYQIZgARmCBWQIFpAhWECGYAEZggVkCBaQIVhAhmABGYIFZAgWkCFYQIZgARmCBWQIFpAhWECGYAEZggVkCBaQIVhAhmABGYIFZAgWkCFYQIZgARmCBWQIFpAhWECGYAEZggVkCBaQIVhAhmABGYIFZAgWkCFYQIZgARmCBWQIFpAhWECGYAEZggVkCBaQIVhAhmABGYIFZAgWkCFYQIZgARmCBWQIFpAhWECGYAEZggVkCBaQIVhAhmABGYIFFzMzu2y8A5u4PQZkIj89BEMEAAAAASUVORK5CYII="; private const string TallPngBase64 = "iVBORw0KGgoAAAANSUhEUgAAAMgAAAGQCAIAAABkkLjnAAAEF0lEQVR4nO3S0QkCURAEwX1i3mLke0lcI3hVAQzz0Wd3B+72npnzPbfv8mT72devP/CfhEVCWCSERUJYJIRFQlgkhEVCWCSERUJYJIRFQlgkhEVCWCSERUJYJIRFQlgkhEVCWCSERUJYJIRFQlgkhEVCWCSERUJYJIRFQlgkhEVCWCSERUJYJIRFQlgkhEVCWCSERUJYJIRFQlgkhEVCWCSERUJYJIRFQlgkhEVCWCSERUJYJIRFQlgkhEVCWCSERUJYJIRFQlgkhEVCWCSERUJYJIRFQlgkhEVCWCSERUJYJIRFQlgkhEVCWCSERUJYJIRFQlgkhEVCWCSERUJYJIRFQlgkhEVCWCSERUJYJIRFQlgkhEVCWCSERUJYJIRFQlgkhEVCWCSERUJYJIRFQlgkhEVCWCSERUJYJIRFQlgkhEVCWCSERUJYJIRFQlgkhEVCWCSERUJYJIRFQlgkhEVCWCSERUJYJIRFQlgkhEVCWCSERUJYJIRFQlgkhEVCWCSERUJYJIRFQlgkhEVCWCSERUJYJIRFQlgkhEVCWCSERUJYJIRFQlgkhEVCWCSERUJYJIRFQlgkhEVCWCSERUJYJIRFQlgkhEVCWCSERUJYJIRFQlgkhEVCWCSERUJYJIRFQlgkhEVCWCSERUJYJIRFQlgkhEVCWCSERUJYJIRFQlgkhEVCWCSERUJYJIRFQlgkhEVCWCSERUJYJIRFQlgkzu42y8yTXVnDDBu1Y983AAAAAElFTkSuQmCC"; private const string TruncatedGifBase64 = "R0lGODlhyABQAA=="; + + private static string InnerText(XContainer e) + { + return e.Descendants(W.r) + .Where(r => r.Parent.Name != W.del) + .Select(UnicodeMapper.RunToString) + .StringConcatenate(); + } + private static readonly List s_ExpectedErrors = new List() { "The 'http://schemas.openxmlformats.org/wordprocessingml/2006/main:evenHBand' attribute is not declared.", diff --git a/OpenXmlPowerTools.Tests/UnicodeMapperTests.cs b/OpenXmlPowerTools.Tests/UnicodeMapperTests.cs index 667695d2..9b0154d9 100644 --- a/OpenXmlPowerTools.Tests/UnicodeMapperTests.cs +++ b/OpenXmlPowerTools.Tests/UnicodeMapperTests.cs @@ -153,5 +153,39 @@ public void IgnoresTemporaryLayoutMarkers() // characters) should exactly match the output of UnicodeMapper: Assert.Equal(p.Value, actual); } + + private const string PreserveSpacingXmlString = +@" + + + + The following space is retained: + + + but this one is not: + + + . Similarly these two lines should have only a space between them: + + + + Line 1! +Line 2! + + + + +"; + + [Fact] + public void HonorsXmlSpace() + { + XDocument partDocument = XDocument.Parse(PreserveSpacingXmlString); + XElement p = partDocument.Descendants(W.p).Last(); + string innerText = p.Descendants(W.r) + .Select(UnicodeMapper.RunToString) + .StringConcatenate(); + Assert.Equal(@"The following space is retained: but this one is not:. Similarly these two lines should have only a space between them: Line 1! Line 2!", innerText); + } } } \ No newline at end of file diff --git a/OpenXmlPowerTools/DocumentAssembler/DocumentAssembler.cs b/OpenXmlPowerTools/DocumentAssembler/DocumentAssembler.cs index a639ce5d..8eeaf648 100644 --- a/OpenXmlPowerTools/DocumentAssembler/DocumentAssembler.cs +++ b/OpenXmlPowerTools/DocumentAssembler/DocumentAssembler.cs @@ -654,7 +654,7 @@ private class RunReplacementInfo p.Add(new XElement(W.r, para.Elements(W.r).Elements(W.rPr).FirstOrDefault(), (p.Elements().Count() > 1) ? new XElement(W.br) : null, - new XElement(W.t, line))); + new XElement(W.t, GetXmlSpaceAttribute(line), line))); } return p; } @@ -666,7 +666,7 @@ private class RunReplacementInfo list.Add(new XElement(W.r, run.Elements().Where(e => e.Name != W.t), (list.Count > 0) ? new XElement(W.br) : null, - new XElement(W.t, line))); + new XElement(W.t, GetXmlSpaceAttribute(line), line))); } return list; } @@ -1400,5 +1400,18 @@ private static string EvaluateXPathToString(XElement element, string xPath, bool return xPathSelectResult.ToString(); } + + private static XAttribute GetXmlSpaceAttribute(string textOfTextElement) + { + if (!string.IsNullOrEmpty(textOfTextElement)) + { + if (char.IsWhiteSpace(textOfTextElement[0]) || + char.IsWhiteSpace(textOfTextElement[textOfTextElement.Length - 1])) + { + return new XAttribute(XNamespace.Xml + "space", "preserve"); + } + } + return null; + } } } diff --git a/OpenXmlPowerTools/UnicodeMapper.cs b/OpenXmlPowerTools/UnicodeMapper.cs index ffc42716..2773510b 100644 --- a/OpenXmlPowerTools/UnicodeMapper.cs +++ b/OpenXmlPowerTools/UnicodeMapper.cs @@ -60,7 +60,10 @@ public static string RunToString(XElement element) // For w:t elements, we obviously want the element's value. if (element.Name == W.t) { - return (string)element; + // Emulate Word's handling of the xml:space attribute on text elements + XAttribute spaceAttribute = element.Attribute(XNamespace.Xml + "space"); + string space = spaceAttribute != null ? spaceAttribute.Value : null; + return space == "preserve" ? (string)element : IgnoreTextSpacing((string)element); } // Turn elements representing special characters into their corresponding @@ -141,6 +144,22 @@ public static string RunToString(XElement element) return StartOfHeading.ToString(); } + /// + /// Emulate the way Word treats text elements when attribute xml:space="preserve" + /// is NOT present. + /// + /// The entire content of the w:t element. + /// The corresponding text string Word would display, print, and + /// allow to be edited. + private static string IgnoreTextSpacing(string text) + { + // all whitespace at beginning and end of entire string is ignored + // if text contains line breaks, they are ignored/replaced with a single space + return string.Join(" ", + text.Split(new char[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries) + ).Trim(); + } + /// /// Translate a symbol into a Unicode character, using the specified w:font attribute /// value and unicode value (represented by the w:sym element's w:char attribute), diff --git a/TestFiles/DA240-Whitespace.docx b/TestFiles/DA240-Whitespace.docx new file mode 100644 index 0000000000000000000000000000000000000000..84f2b2286de084082f2acf54c9b0213ee2ae9a0e GIT binary patch literal 16458 zcmeHO1zQ};wjJDqy9IZ53+@iVU4y#^2=4Cg?(Xgq+#x`4hXi-tvR6+s(r~qhCEn!<5Clebdy^rp8 zCXPDvZq`=s^T0tVa{!>g_WytUAASN2iQ{tpj7Xw)63@cBbW-2y1fbc@{Dx7e6+iSe zQdEjQ$MAW*@WqoyB2gpcO_0a2I9~8&NyN_>TaFnc5LtY3Wd90D3qq4Q>k?ZZy&~-~ z6&KP+t;h|>wI*mB|MOhIZFHiU1yafXhJMmw6jrSBFmOnX`w*bRig^O(kwX<$zB%) zePcAoe9!$%K-uMIf&9<;GvJl)Q^S0!`xG!YTXI?)984rqGn=^jj>3eb&2AFY5#@v; zSRIv3tp}m45udvqmJ?Hj-|y`SYM#i2gtVh<@4;AXp1TQ<3K9Y$9Vz-C_b)mXJy|Zm zsWOC5YND%WG+l*!RQQw)mr za%=EK^*Dk4lqlx|-w}Ik#Z(vJ$8rr{?zo9N{`8-iK(@WTfdLf$Cb9&vIL#Np8abdu z!U08A&%wmXk%9hK`TwH&e^@^M^3yBgdM)}H;RP=IU;Jk}6<2$)^5qzerq{4mprExR zWl-0aEtXzic$b$!b&d?g$7ko_r#&1q#GJn;=={J*Qb7pogjsyh?bmwha04U;b`&zX zE!u6vr|;jIzKoGbQjPhCYoLcs;XuaThonsOpzcwN-0l^_nG;h@%^Q){=V#fgEZwDe zv*e^QEi76{Xnlq%;}h))!f%_z_J%d(uT5s5i;3>rsM8tvcpTrt9BDyyLyKd@$Uv46 zYghq`^P^+t*dxEYcrY}O7A6-al;O$KR2xO}^&o5?o1guHNTWe-%N937NAL=0xc*Z< zv^TVG2f+YmcfS?K%E zNQ(l5gV@Mn1RjuGSSUGUn(%_>;7IC++eBmSSm?x+3xHjDP+wIrCC13>F#9dmI3Ci8 z;@A!%bEZgEPPx4wJ}h!HDc=|sQ6O|;^Ws9PwY(wpUZnRK zb_9Xb^+Z?1X-u{sE65f08ZQdD@H_NW(XKjwjuJ0gy^~t=!VeXogYY+o$IJU6Q%7y?Mo% zj1E^?0u8XKL@p50wQ!`WrQ=*&I`6)?P^r3cJL`y;B(gAL5f{n)&}(Ke|97sm^3M}< z&14G{y-gj(d$##}*L%*@4BVJCX}i31SeMpgye0v{3v@e&M3pJ;ry$J0OHBov)X6~~ zVS?p2_7e%dwnQ}+o+Ne#;jXesN44!S9*Ac^Ujm?bI}o%HFGtw(e|RZ-%wO5|Kdc99 zVMTstPw~`PiK>VG5!{}G+g6NQM>jtjQ3UtG*xV!GfZ|`ItsprVwR*y%O64WmAy{Kv znQh8;xWXD;RE-eo0^UXMhRr=*NXQ9?n|ia3<7RI5Qgq@P*k*I*mIo2JaBM|mN4@5W z2Fk7={%)+$FF_vm!>Nb=JL9S8DR8o4QG4PC>Ib95j_=;&!~77uulg2fZ`vDT7fQ}A zLJclb2jOi`^#bbHD~jXr<0E5-hd$az`d<&WW;1bOaxK#vyX!mps^xWTZZ%D>ABp?F zmpn+{!##7T7g@WlgWr9rr*-<-_jBuwzjpkI<18%`&Mu#V@@EO^P6yJFWYz~Sf2x*l z{iPdxos92SLH-aOOYW)O&yp!AXzrqfj_CeLx@M-06cM5Oja9QH zZ-Sk#Q2$Nl*(DOq@PXQ(3mgEz13-iPA@lz*jDMBbXw$kr$$_70O_G`0_O~Ru z>)9a>fD=U{dnYMJS`cF0a}2@{mAHyl4Ud}B0hU!2zf_&Q%PV^y=?-TdS2ErchfT2A zRA@OH%#2)Cgc$wcpK3BjpN8(;gVti;F+#N4Z$p@!z{r)Dfhvs&!TIZ~9|MjcV zfk>hp_-5P2`n)qG1+mn<{mJX=4XZ|L6nly|Cwhj>I^4qf%lS#NECE3Pqc&CrNdMGPz8`8eQh8I64x%@!aYNCawxH&60R{S`s#%D8hLD3MBzV=#6kIM^{G zNFH=%9q`N-$ts6pxralY5nEys`ga8>gAkro#0P3T#&^McP||nVBvBnf_E}39Ik#*{ z(0tWo_3OBdAT+Ju!5i7JQWf9IAnUdLZn$B`LA-@R4@$kA42jTbJ}&%Z9L-Y&7S_I$V;|5kfl z_H2~LaTY5ZUc&b%asPQj3dSGx8!az2Yvp$jzI1trA3Q!j(^(ARVYSrQA*>75T#{3D zWt>YD>T~+{}u+@M?R|8hTcC5zacSNfN=)FOFSFP^)Zz8(jMoPP9Run@OD3eA9~^RjSi*+o%;S|{ z6p#4eZ%waA+e=RcuJ5nbzKpFmewfMNWyOSnmL*eNhTiiVCAbY$(*v_pIKm@6GqWGzWaFe@kisyrhAnz z7qb1>o#ox#+LiGU_o_a@j*MNKi~PrS#Po~J^YD<%6gc5VFzBj$p}59$EE19cR!PeK z$EZRP7&@2zDCnR8n`D$Q*jz(2w=T5}NiygKoW;)r&pwS>TO+lEbgC^tiOe91(>FP+ zHM#dEa%>P9r>DdM@xzDo#$>~ffQ?mI0GSZ;6;KIgB@}O~Yqi6pI*bD{dyd|mD{7ug zuHth~U;!Svs}Pgb_v%%WpapSy3P93w!G(EJ8Y*LF&CvIRFA)mhx(L3qJG}uAhQviH ztW{KoXZvB|iEjlzn`iUuzc`!onl0LDsM`b{&KR%Gca_PXPM&KCI*7GW^Oa^&K~@%)-Y8h z|4Qxl_*1V@PsQGd@T_y=`M-Mnwv=OCNWcZGNE`qF{a@6eB+)mXOy zdi`C5HLmZSE;O6S-oTwseL&W0(m~qS25n%i zrsfS!2n5IEzF#Ex%pz`GlpAw@5r~ z#9v8Po`BHn#|j9+&EF-3e0GiEe>-L8MZq3Jj=6>4_w!;vHT3>ieWlJW>>WO-Nc3cy zu%|FWEqwyxuQqEdcPzVUyW|)Ghls|QY(qF{$|?3;yY)JX=D>*f4c)IiD5ZvF4@Uz7 zCtaI$9#ii!gwYO0ax;F1^!gZ22M!TwB=Gd`bJy$D@o@8rB?j~kH(@9BuJLpfn3mMX$4F%Hz6&9bY$;g{76t@F8In-{A5AP0vk^3j5vuwe$Et-TveN2 z$s3L7*t%akZjwM0zE47|{7(7fdytQWodP2Ux9~{hW{c~e@M?+%yoDpr(sU+Ga z(8<8BPGQgxJK*-`b?zfc80$|;hTqKj$;Q8lG5a!TkDyAFHufHgCiHrq??!}w9q{gD z*hLBaj*KpcGQUAoMt}8w&`7T>+t9f1Y5nt4Xadn4s3ITS{whky>XW^sDU$iY4V~xTyN+w95ZOM~7Crw_bsMav^C}@y1 zx^2M;m&{x;?xXk<9b-iU^1vY_h=6F1W{C>Qacu7>Cq1)E1xjPLn3YNkH}ZJpBaT3= zd2+Gz0{s-RVp9`OdT>qSXC3LAeg{#3l`cdWBgS-*alt%gwl4pGtQA4^YdqSvu@_y8 zN6SR?);GXMe#-#CQHd%J35C4NkE?1cd~QW|5OY~Bb4;CC+a4)0TBH$srtyqP29T%? z>z?1W@7Spt4@(tJXAhnDMXC}*tjxivPH|e|K3AbjaWG}vb*6S;jv0PPer=hRm6*-D z7&Jsbe9NPC92$hatOz)&a(Ly!2m$ePY+amSjWO)6oH;Gw@-Eo*cpb*%dNCo#o-n1v z_^ipUvZBUqf4hNHLG45&nQbzPo)QP~8QvtPPfi$uu~)kyKULq9_FNXNdDo&5y>jy_ zyB1oUJER}Y+RcJh)_gu7YMT7BI~`KXL?VgYV;+XcH4evsKYm%x1K(PA{B>j-WI zdz)=7b>U~N6OY8jGafP<@Ap{q?EV__Xhu+tvgNX8KA50JEjP2@T*dGnS(kMik9|tc z6Wf}u9Q@?6#}oPEmKGq8wUtGeF0qNqyjAHF1)5cl9q4-ewz}^65TOL;;q%ir5r!Lc z(C-^RK|ELgX7XFNUx7ua@9Di^VIVxqOaKCTN-eu0I{- zciW<6$&pur(cmT8y;kn*XX=RL1i5a{=++w6nBjh-6R38@{of~$YnjxFk-*8L9>QN7 zB1aP^Cv%%me>nIJYS!{QoJgeR&d%U4rDx`8f*>^p#Fh@_oZpcEC^nT*gg8*Am=6pD~9XM2LESG!&r^RThawuS946$VcO0X0wp z7xQEC=OM(*#vS&(O|w~P&o{!=1%utKHE2IfaK)hPjUoFL2l$@LnBZO8ozPp}0_h6E zPKFdP(8QAtPgLpem)wj^68R{lUg+3j&rLafeCtupTuRq$DsuvNq=^#jd@D)tk6CeQRjzJCJIx8eN~2&r*K-O(elq`*wL`!#FCFU~@; z0PQmBV48w(KLQOR>AR1>CS9&}D4JIKfmVcr8@Ol8rsh_+Aol*;1FYt?J#_=dsf$`- z%!wY4q}mUi$bMz=W@|Hy&D_k}vVJ=P`l9SK2?P%xZIJ`P_TWz=Z>u8VW*8IlM9I1Q znT?{a@TtKE`RU3dq%AE!o*hbzD&as>Kr9E5#_b~3a~e{qFPhq(3JO$)(Y2^RHcJZtR+dEC#8ue-YTQC#TPdV%K z+#27Fp*#-cTsB6<Qb2*E2@}1x7~!pVETDTSr|PRZ2cKlWCs1N>pno&b zln8z?VEO`nr{nU8fFl346ltQL61E5dZxz$O2^D7Yg@?XKVyj~?!+bgAwVshbxwnD; z=YilCw7|Jn6UO!ts%7gPji-0s!El{mbfvjkC3( zi33o3{o(tjt=a6b16L>DuJPePI%S-4Ce=S7ui2*)(_yS%gP4Uj1%xCB%(y(?k~!DW za#fLHn%7OsqFw#4H|4~8t$E%%p@|~r5t(BoSC;HlDS#o?JqJUmm%&>!$T=&|RE%d1!NZgx;Lkc>gd&h@U{4;IcgDGj} zhWk3EBf6BqGOWz_jg0kHANKiGd1S0V1exkkT4j%;0UtmaX`8VmtN$d5^9 z87AXrlJya+*<@>n{$3d4E$Rp%m+vxzv`mbo3}Cd|_0*t{-nM>ZJImt8KcuC26PprR z2A|U{x)|3d4ltuFAj8_|#*-b5ugCHd5*^Kxm-pr{Ynul{YG^H(MUkV8WM<#XlpXs7 z&$@k55OPTdt?5_fvdJ#Nk6zU(w2rmyM_wL$HoUM)(DF_#p~iPneqV;IBg3gv-vE9E z|AC&DlC|-B8Gg7t%(Nz8P8gpdJk9_UeuXE?rWb~Xpgb=ABJ6v>a8DV&rT$zBPS578 zkXcTHFz>)M4$Vy};dOuFXVUQ2m|j59LN2n%{FGpRAHE&k#^`>V@C{95^JpxFuJ~u( zq@_*;Za($FtqRPDYi^@1D8?-n%+KdLK?7)2#5TzM7O38u+gewD63C zcjryW-WYHt8_)PRBg17+zZ0Db22V8d>kkWhCCp<$@p@1XiZ8*dRj6#OQ>S#7Z$QN- z`m!t5N*j7F3+HoN(}3BEkQ%w?#Fjk_afqT8xkf5@4Z~#PwUO!hDY%6Bm`Y}aLt-na zkF;dbYv1)F*EZ%&lf~OQjTT9RDZ~XXdUQ;Pe#m~Qv3Y7I2x+6#_S!A^ptm12f$|fi52_$>&24Z1A6&*!y zd=6$^Bd8I6d?L%7StC2|1tdqfN^{qLnLWx;6y+w%eE1;w>1KmS7+VRW;!c{oy<1u} zpVrA?)VsOB@sT34ltZc?SCS%o1X8fIglZsgK}#7++YVHv-q9%70#+X@jD;pq(u>3* ztxto}ac#OWH!TnQL6P~|!8-vy-?)YONNki*DoBy}Ss+gEKcOH8%V@ z(^+bjT5bt@9yxZzp#Ra=$~Q&k5-E}&Lc;T){1!oqED>o2K$p$*Ud>f(!cKJ73vc4R zl);w?ef73BO53O0G=tag-?Y-~9~K53np#}HYP_Np<(rm3yk1WX+CqnA@1U`yr_bui z0g7WyU-kwzboNj^!ouDT&2BJ}E4S|+L`BL0-Zscg7583d$TQU5*?J;Blzx(gLXQI< z#)pQGnhBz|L*Z~}@s??#qg7%*<89yh4cU-QnU+`fC(%GNCm&N>cE-#0BN5YtAzwo} zhoDq1_Zj0dz@av=nbe6mmbe*4J4a4xn>nsU3P#QR^XHIynKMvjv@r~2FvvSyklVGd z%+uH`U-mUm&I$jkD!n2!PiFzH0gJ{00Kb-k|5)gCbaJ=)$13l)OfAPXc8uXoSJQ4V zart({P`dGE(_>u1cdAUiylKi+(AR#dX>%rt+w6J@S z{rZ?pn7yHL<0w`h0v2|xL!c+4>fGt=YNs$rlNNtWr#0type&VpVz2Eo;E-z?iX4 zU5YBYty312uH0KExlH#C)P++wOZ3{@DwMcC)@+imJeXLBhj0tGYH6z43OXFAJNy*_NvFD%aOh zAMMjNu3#_gcd7WdipTS$C_E9#RJ=3_2ihFTKB(Ufs90NhXOkc83S`tiB5poJ8e_)i z@0gA!mh?_GMW7*0uxoZaaC2B1T4~yy4+bRI?kDGc_q9oRv}AQ%_f{4^TM0TzD0iuf zgW8l%&enLAh`eZ!}G?mjM-bS82{H@(rwsFF0 zPc0#&16%eEx$cp2AeK@QyAnc1qjExbZBMZTb;3iQl!pIRWVqTnFMf%8=EySh@U&6R z=~a^=E+v^)QInKcbbM#a9MBlOqd=0jgM^#5GmVnA69b#JqX3$=1MS2*j67@>MIwWo zkv3RG$?E5|A98DB^M!D0`0N7Zx#fk=hp?G{>&gDN3tToIpIjN+YS3A}xSq@+7WQ{i=Q7mPxN#B5|xVPLdNR%FLSs$&2V@=B#yG zLNmaAZJ2{gvYzrAV0-Q*d1{7FD_o<9Q3mV#XVoQI#G!dO-!D&@^shF-H+om6r9a8C z(J-fc)4pJvwMz6=%>pu`h-*oaLJ@i9h!JLsr8fAFuKZ$|VT9XhN*~YUWWkURNcGAE zB>=TQc^?LZ*?JBoE?-5*K31RxDy?`SrPM~KTVs5gcJn-MH&Kj9%+q6U)Q(ON*3;m| zvw5yDfa35TdEYtu1c%%Wg46DgySa?7T{5I=Qc$LJK&E1=Te8pIF@1*5k3G+uRO`?> z+J{{O0$HyQ25%~qWPO=jJshDkqnTc%HFFf|}Bj0FY`{Zl<6 zvJU;jdroakqxZst8NTpKGiYoROBJMhj_-GeBR<3c;EKXr)anXbO8wD~#n(Y%3@5nZ zIG0sh6y#nr>HSn>YlIeQdSIzWnTJ5>nyh*7G#u`lt-JY2saTo&d1 z*bkdq`O(Lm=L*Yl*W-k?5k=}El?6`=Gv&Uj=fESiLFLX~L&|&b&=aRr%tO|G{lhmQ zdQpJ$BrMw{>HxZ{zKYTUPD_3bUBX&Y3p&eLbSCO!-Z&>lGGdNvkok~~cZ%D$Jtk@g zWkv@FWyq7U$U~H6+iNXq4;Pu)gagYjvLKgTcp`{n8QnH1n$4T7plr|N)wmz*sHt%@ zxazmm>Rjp~)}s99B5|2GFItfOQDbnKC{ek>^OfS#nK?OHk^SQnaD69LYs7m@Xw(Uk zEA`#8tstmAtXK?C_C#Xw;@Oc*3k5Oh0^ED z?YsPSeh>;oBm7$(vRSSWxMoR~(dSQpM^O)llLWwy?*PvyXmlmd09dMjH2qhve;}nw za$1rVh#=@bh6uo+6Hg1NG!s0+p(hQ)peNlSQy1I=q5l{g5$M0$1pen3sTnY$kf>>+ zQK@M!NIpvZ1VVuW|L5TTeMib%zinOMPiRp1qqoB_d9TpfoAxIv?}fPs)Yq-#DyKqk z7TYs+#rmz1B2}BDj5$QZ&QGUIbsHg^@mG-sF;20CGW3c(^yTT=5Ao3Rfm3=_Z5Q`U zMOKD&?x$)&+V z=u1tXMo*enljRQT!#ZZ=FethE7LaS>gsSlBWcVY>Ie(hKCI49)!6eAJv0GssyrP__ zcYFb`b72v3>B1)R(27GY2K)}>#NuuDl-#WqNB>y|L2;uhh>~BezY6}uLZYV)2c@Qp zV1=@t03t(tH`cKO!#cmJ*m0%uSIi|8&V8e3aH+B>jdloo6M1qu57X3njeRe7f^ zIC}>9oHj_NXamk8VuIQZ?64kRlOkrb>dE}Sb7}c?S$IaJ)1}Heq6T?P$zmOgae(Z} zQ$Sv*bHJV%aEwhYw5-HUEW8Riqt;XnO(bRfD!6qb)9IYsa5%Ia+JQw~>mS7`KUX`9 z?*Cr@M=BxUTJn-mMd|XU*?b26_Ev(WjEk8h^ucRxG=TS zDm%|z-tuYRnah5o?bjVsWe8# z3`oDn+``ch72?I2snZD%sPE*OSO`n7xM1f@H)?pM4e}~*@*1BaZVqMOB{eIQ>l$5P zdhVB<%}J9myR*@$M~X(<*-t48hPIHOh-$Ey6B1xOxh;O*!J9R=M5-@;_-S- zVk@ry?9Q}7Cob1)#uaQoKC?*2c_OULn&b-p%-&Nikwh=`a@=N66y{DRS&`kmuTbS3 zVFQ6k|9Wid zmWEbCCMMf={JdP5I+6!t9Gv5lJX4kJ3oESR;T#Dig4xwD5y9_9KfOD{1?5ekOyIj6 z5@*-SAPjpOHb%Z#J9%L*t@PqD9@&+MYf~EkNC7bp*p6=Dk|;)5kmD5eWY%5diyt?W z_~c3&Werv#hLg??FWqi^FKoI_-ahI^s18~rfPVQR@T;Z7FSU5)9Yw$eMTv(0f4 zho)n%k>j#JeH^nNAr=j}{)TMNGvS2oWYu%VJ!Czcoc0d#Y zrsOQk!8WSUP4#P&j#C1fyJblAR~wBh=$;oB9C;`wuKCjN7n7NB*@O}n8j3%0>E3Txbj^=`E@Gms2<~f)TkJc& z9J=5j^(XTDCkn{+D$`G-x3>_fWI?o!PPM1x7CEMe;-4O2>#m^J@VpP8574~7LvNW_ zQK_P~pHb1rtUk5iXSc|-JGw*ZQt*7>sE%JLJXy#laa@|1rIPO&FB|t7)Yx^aZz!h! z+=U_K5er{XzGb5?ep_o|U@5J~b!IhjBSrOGgtSE59^dl8w1$W&^C_DzKHsQrIER<| zX~G=#bjn0V+mk)JEc+E0S@NH;xx#8ZO(VcP{AA#6J=(wK*RCdp%758uIZf;hTl*DC z71-rJ<7G5FAGKsvS3^q|s3g_y1<;5yQj|0zC6jx+#$S=!w)RlJt4r=w&)0k8H)VzI zYUejYKMQn5`I<(H2@@J%&{MclH9A!s%>fabQ5>WUAyzS}$m!7S{&rqP!=w0uw6P-2g#VIa;edlC81ZahFPH z`caElj@id;!t?m6eFzDk>iJYStw`TdNA+VWKEi7K`#3--k8OGcBdXz?tzojQ%sB}TaH>(BRh-I7A)~78)37_~ zSNQ^-j!hj#DKllJZGHpAhz~Ww0AQiN-NCx@jWyio5?9^ftVb1B{gly+?g;5KrZzz- z<$cYD(E7DLNYGIZZ6_=(%MJ>kch>W#vr&bSWgw~N2~1ZDsb4?#6Jl};+EYm|#)!i` zTAYnR%FtKLQV7yb(d6fvv{bBVW+Opkta_s|QdTFH{AMR9xaWfwY6%=r_kO$(zzLW+ z!;*}&GiMpS!;TmYpKa7*6c={cMYDj7oe`X+X51$!^Vbf|Slv;n%Yd8cF%58D=UTBaYMz0+uK%elTJ5%l z8-U6}3mD9g3=HZ8M)E2+*xETV7}+}f!5Cn=%l}960=1}Te6P*guXGQvTg0IAr~*zi zrG9%~>k%pl*m>dbb=FYX#?_?vwazbh`I*Z)euf;1o}M`p?Je%M$GZqZ4OQ#Rtw^-K zsys$({YftsE=NIR$SRf}&or0o2#DLMy1l6yrW)bwFf)REFbsu?*!Nd$S6K3EIPI#h!?^)^u=azv0w!qGAE1@_aaSbGuPd&ra|s~C97}S2F-Vf( zM)}nPrU&HVhQ9I}8`a+ZpyDR`^vJi0ecEO-V?rYJBK_*t-c6XDw(63S2&sx8c3@T; zi607PtpQVGOqa5Dca76*Pucddc12uonCsYg?)dgor3H!q|xVq@8 znBhB$Eu9)VKdNLQ-9lpN!J@cw?J?~Ty?RH#u(^#WdC>Ujj;kG)qOZcYFXD|x{ukWn zCm$=QWA_}qu65?WK&LA&y|K}kcB-^#t6g&ISx-jB>wBhHH2zbvj@O0@7cpZeVBkOE>;M(}6Sx9vRAiFufCTnIF21L(1}glc-Xk$5@FPaYBna zp=FH_zCm8i`iV1T%O!*AB6}j-sNmsTcJ16CzcriV%giKamn#=|OQ}wa6(i3_Ulym& zgJ@9&l~jOQsI6ek$Mu25;&ujCm?~WhoC}rJr!f>9#%dEf%|e>{STyCdAt&n@IW3wM z%#=Y{Dvjg{-Yy@R?0umcB8xk(fw+pJ&6@uh#*}!scqil`&=JkG zG2f4OQR73)^kE|e+88bca{~8DI<7+AR);qCh_&^#%W}dDjmeJHEOnXAQL)Y%5XM{E z?_4(Sq4qu@e93&w;iKy!g=~0dJ)fmY3@O6%keF?VZWErmG31Snhi&rdXHDd4f)_oe z(Rc*2gS2AFC|EyEuun(6G==y}_YC;hclrkbmd4x9B~x@C$iu|5pO)ke$(0|um9vbD zE4MVt_C6v|Gn||JNWl`%%5Oe=6`~Jo7Mc@!|(p^pFNnf{<8;D zU@HEuF~)DzZ;yt!h^Z~0{^G^QILiJvKs(+2YmSe1*C!Z H*Qfsh8m)u_ literal 0 HcmV?d00001 diff --git a/TestFiles/DA240-Whitespace.xml b/TestFiles/DA240-Whitespace.xml new file mode 100644 index 00000000..7c21bac1 --- /dev/null +++ b/TestFiles/DA240-Whitespace.xml @@ -0,0 +1,7 @@ + + + may or may not + / + , + and + From 11d35b14c0272bde6073a405c35be91a063b5994 Mon Sep 17 00:00:00 2001 From: Lowell Stewart Date: Thu, 8 Jan 2026 06:04:30 -0700 Subject: [PATCH 2/3] more comprehensive fix for whitespace handling in UnicodeMapper --- OpenXmlPowerTools.Tests/UnicodeMapperTests.cs | 86 ++++++++++++++++++ OpenXmlPowerTools/UnicodeMapper.cs | 54 ++++++++--- TestFiles/UM-Whitespace-Word-saved.docx | Bin 0 -> 16270 bytes TestFiles/UM-Whitespace-test.docx | Bin 0 -> 13319 bytes 4 files changed, 127 insertions(+), 13 deletions(-) create mode 100644 TestFiles/UM-Whitespace-Word-saved.docx create mode 100644 TestFiles/UM-Whitespace-test.docx diff --git a/OpenXmlPowerTools.Tests/UnicodeMapperTests.cs b/OpenXmlPowerTools.Tests/UnicodeMapperTests.cs index 9b0154d9..0d2e1e9f 100644 --- a/OpenXmlPowerTools.Tests/UnicodeMapperTests.cs +++ b/OpenXmlPowerTools.Tests/UnicodeMapperTests.cs @@ -180,6 +180,9 @@ Line 2! [Fact] public void HonorsXmlSpace() { + // This somewhat rudimentary test is superceded by TreatsXmlSpaceLikeWord() below, + // but it has been left in to provide a simple/direct illustration of a couple of + // the specific test cases covered by that more extensive suite. XDocument partDocument = XDocument.Parse(PreserveSpacingXmlString); XElement p = partDocument.Descendants(W.p).Last(); string innerText = p.Descendants(W.r) @@ -187,5 +190,88 @@ public void HonorsXmlSpace() .StringConcatenate(); Assert.Equal(@"The following space is retained: but this one is not:. Similarly these two lines should have only a space between them: Line 1! Line 2!", innerText); } + + // Verifies that UnicodeMapper.RunToString interprets whitespace in elements + // exactly the way Microsoft Word does, including honoring xml:space="preserve". + // This is essential because RunToString is used by higher‑level features + // (OpenXmlRegex, DocumentAssembler, etc.) that rely on its output to reflect the + // text an end‑user would actually see and edit in Word. + // + // Word accepts a wide range of “valid” DOCX input, but it normalizes that input + // into a canonical form when displaying or saving the document. These tests + // compare RunToString’s output against Word’s canonicalized output to ensure + // that whitespace is treated as semantic content in the same way Word treats it. + [Fact] + public void TreatsXmlSpaceLikeWord() + { + var sourceDir = new System.IO.DirectoryInfo("../../../../TestFiles/"); + // Test document: crafted to include many whitespace patterns that Word accepts as valid input + var testDoc = new System.IO.FileInfo(System.IO.Path.Combine(sourceDir.FullName, "UM-whitespace-test.docx")); + var testWmlDoc = new WmlDocument(testDoc.FullName); + var testParagraphs = testWmlDoc.MainDocumentPart + .Element(W.body) + .Elements(W.p).ToList(); + // Canonical document: the same test document after being opened and saved by Word, + // representing Word’s own normalized interpretation of that whitespace + var expectedDoc = new System.IO.FileInfo(System.IO.Path.Combine(sourceDir.FullName, "UM-whitespace-Word-saved.docx")); + var expectedWmlDoc = new WmlDocument(expectedDoc.FullName); + var expectedParagraphs = expectedWmlDoc.MainDocumentPart + .Element(W.body) + .Elements(W.p).ToList(); + // Iterate through pairs of paragraphs (test name, test content, expected result) + for (int i = 0; i < testParagraphs.Count - 1; i += 2) + { + var testNameParagraph = testParagraphs[i]; + var testContentParagraph = testParagraphs[i + 1]; + // Get the test name from the first paragraph + var testName = testNameParagraph.Descendants(W.t) + .Select(t => (string)t) + .StringConcatenate(); + // Get the actual result by calling UnicodeMapper.RunToString on the test content runs + var actualResult = testContentParagraph.Descendants(W.r) + .Select(UnicodeMapper.RunToString) + .StringConcatenate(); + // Find corresponding expected result paragraph (same index in expected document) + var expectedResult = ExtractExpectedFromWord(expectedParagraphs[i + 1]); + Assert.True( + expectedResult == actualResult, + $"Test '{testName}' failed. Expected: [{expectedResult}] Actual: [{actualResult}]" + ); + } + } + + // Extracts the expected text from Word’s canonicalized output for the whitespace tests. + // This helper intentionally handles *only* the constructs that Word emits in the saved + // version of UM-whitespace-test.docx: + // • → literal text + // • → '\t' + // • (intentionally ignored) + // If any other run-level element appears, it means Word has emitted something this test + // was not designed to handle, and the test fails loudly. This prevents the helper + // from drifting toward reimplementing UnicodeMapper.RunToString. + private static string ExtractExpectedFromWord(XElement p) + { + var sb = new System.Text.StringBuilder(); + foreach (var run in p.Elements(W.r)) + { + foreach (var child in run.Elements()) + { + if (child.Name == W.t) + { + sb.Append((string)child); + } + else if (child.Name == W.tab) + { + sb.Append('\t'); + } + else if (child.Name != W.lastRenderedPageBreak) + { + throw new System.InvalidOperationException( + $"Unexpected element <{child.Name.LocalName}> encountered in expected Word output."); + } + } + } + return sb.ToString(); + } } } \ No newline at end of file diff --git a/OpenXmlPowerTools/UnicodeMapper.cs b/OpenXmlPowerTools/UnicodeMapper.cs index 2773510b..ee4c46ae 100644 --- a/OpenXmlPowerTools/UnicodeMapper.cs +++ b/OpenXmlPowerTools/UnicodeMapper.cs @@ -61,9 +61,9 @@ public static string RunToString(XElement element) if (element.Name == W.t) { // Emulate Word's handling of the xml:space attribute on text elements - XAttribute spaceAttribute = element.Attribute(XNamespace.Xml + "space"); - string space = spaceAttribute != null ? spaceAttribute.Value : null; - return space == "preserve" ? (string)element : IgnoreTextSpacing((string)element); + XAttribute? spaceAttribute = element.Attribute(XNamespace.Xml + "space"); + string? space = spaceAttribute?.Value; + return NormalizeWhitespace((string) element, space == "preserve"); } // Turn elements representing special characters into their corresponding @@ -145,19 +145,47 @@ public static string RunToString(XElement element) } /// - /// Emulate the way Word treats text elements when attribute xml:space="preserve" - /// is NOT present. + /// Emulate the way Word interprets the content of text elements + /// depending on whether the xml:space="preserve" attribute is present. /// /// The entire content of the w:t element. - /// The corresponding text string Word would display, print, and - /// allow to be edited. - private static string IgnoreTextSpacing(string text) + /// The corresponding text string Word would display, print, save, + /// and allow to be edited. + private static string NormalizeWhitespace(string text, bool preserve) { - // all whitespace at beginning and end of entire string is ignored - // if text contains line breaks, they are ignored/replaced with a single space - return string.Join(" ", - text.Split(new char[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries) - ).Trim(); + if (string.IsNullOrEmpty(text)) + return string.Empty; + // Trim leading & trailing whitespace when NOT preserving + ReadOnlySpan span = preserve + ? text.AsSpan() + : text.AsSpan().Trim(); + if (span.Length == 0) + return string.Empty; + var sb = new System.Text.StringBuilder(span.Length); + int i = 0; + while (i < span.Length) + { + char c = span[i]; + switch (c) + { + case '\r': // CR or CRLF → space + sb.Append(' '); + if (i + 1 < span.Length && span[i + 1] == '\n') + i++; // skip LF so CRLF becomes one space + break; + case '\n': // LF → space + sb.Append(' '); + break; + case '\t': // TAB preserved or converted to space, depending on mode + sb.Append(preserve ? c : ' '); + break; + default: // SPACE or any other character → preserved exactly + sb.Append(c); + break; + } + i++; + } + return sb.ToString(); } /// diff --git a/TestFiles/UM-Whitespace-Word-saved.docx b/TestFiles/UM-Whitespace-Word-saved.docx new file mode 100644 index 0000000000000000000000000000000000000000..8e590c02c440f3b9ddead3b4563442fcc2ffb0b7 GIT binary patch literal 16270 zcmeIZWl$x{mIk_UcXw}G8h3YhcX#i`-QC@x@y6ZVT{blC&_LtRI6TgoxpU7wb0=QB z_jjfucE+yC{Jyf4xw5`oOHl?49321&fCc~nB!I1N^S0U`001!r0DuO72GtR9uy-}H zcQsJ;a5Qt#V{o^#B`yR9rOF3@ezgC;?SJtTXiT24?`J|5yH9ySN@!6t{#jT-0~*Dj zO0Rqjh3y5Z{+c+_`PPjFs-y;z0BcK1!E(RJqCOlnx0-1SgVN+iaf-#48jz%K#X-BW zyif0Eit}kFnQ2pKjNRXswX+LHmK}tNtZ5)|N(ELSyC^#dL-PR;OOq0(PVO1QLKysl z$+CUlwF#ECN#RJX&;*5tsGpAtmsG#Z=3_5Oify{$vY=^^A{%Vjh#qm`eY}{L?470< z@4Ob|LU0+2i!G3@s1XbG%azELRf%?Vn6$*rs6491TUFPit@az8{2KoDty(cfHZFkYNIn=tR-i`wYN8YETj6>{I&Lek zxXlJkf+bji$M2cLAFJ7s-=>{zL9B}Q1cots^Ciw)Vq%`R$eW#}81wV3VskX=&B(*ZfSX1B8;o34WQe-C>e}~1r?3_FCEb1v8iU^^DDS(M!eD*fi zMb&;giaf*-V80;IYBbn!z{}PXy7_QS|Gj@mpfrp0AOZk^kpKY7$3t;*a5iN$bujs2 z_u;qxa9F?ewCwS?aD9mk&xs$(q0$>$8O14McFmzA149>F{3BJHLzW=R@kdhZ7O?weO}&DOpntHdZI&fW`MT*>Cpyxy?K37rMUN>Ui%(S z?#6n2-D(Dj4h?FQWxf`F{iOupL6pz<-KYYs!u&V zSI?%smVss2@4UFT=1GtH^>1w)IA18!(t`AXT{)9adf>#C=W-TF`fPd6H^lm&gvQl?97HXB#pEk#s+B6F19yV|8&ZZK3WbsUge3-rJd?= z??SzPqz3TEg;w5g$Qykt=Q~`TG{$BJHCZ%>-LPz(eX)u_7L7JXuHz>a57}_3X2tP< zbdZ0cvuy4SNtm{v0Gz=XcF{0vem4y-j<$C&*$|p0p8u}yj9G-b56|A)FG=KXkU}#K!7!P2SQsotda+qWpfpX!Cfj z^2$`*7uPeU^YLpZ#Mev|=$72k9Rrt^r+HqYhlWlUE_k;mj+|{&h0O=@%Y|=#SZ@li zaQm*>)8j3hp|y+X$foj{7Cz8b0}bAfdwG0!Fc$B-cQBXhW}C4~3K=K$oj6N7DMJb_ zsamb&TQB-kr}7{QJVdWYIO;ilt<~mx&Ob5~CiL%QI65iUr!&`%yY-We;Hc?PPrqUXZcW$CW2O!icEXL)AoO^wfutWv5_ zZg|z6?ff2{`?`)a!q5DDlms+#aiCog!%MZs0_f8`v>mC`2Uqz8vi%I~z7{rGo0_Xm zE1#s4fTJyg4-Ch+3?nj-Zd6n_h+uHhC>;X#Bu5oYy!4jDZ2dykVYut5WXzODz=O|3 zvd4kh@9n7P{nn43&iq+1ZbG1tGpG$x!U^i#dzcaqC1r@hI6|YwGKqx-kxS??&|E|U!bU&w(qDvY?6`#U9 zBEt;oPnO|^%9*yj z2JO*Dm^@nXbStGW@{X6qCpGx`Nv;s?BA*X!iY`wB^tXmJWKcP`9b)pq=6|K~K$})DbniRBN(++fQt8%q7e3D+Z7%Fc>m6mlZJHibeZq2NXA!vviKy>e5V7 z&&+>q##&np7X?e`$4tgAbuWgk6>eXoG$^b)W4|5Z!Fn@BPJo3;=ER8-3kF2j25XHY z#f>ru6_OY!qQQuUesY2*UxhuieFS zD9n&}Y8vLrt)b>q{ATQ+MT|$(%(M6cM(r^8yst9!cSs%QkcLO#j#{gX=f*syyaVz+ ziSR=0=Fd@KCi^Cz75KDj`+paZHkm2Yx0d`MXP8E$6P6@ls?T`Y4l=Wy;Dw|WFObqz zCB){1Zdj*Hg=yUQIoFP@RL#UWVx2zvAQZ5TP42K(HM@d9JX6MC2- zuI%vyiN#|!awCiRInb(q2$Yeq$fyr}av`1TM#+><0|z+2HyR;4OU8}`-W2#LbgyG7 zgr%_w+yt&J{fqvjp%KYMgDSgov26f)BQa9gC1%*}AtKZKFjQ5M6SSlWHHKEjBg-|i z%B|R3Cl`-KGgXdS3PB5gYM;58iDi?oIq%o{gsz=33ExkA-EUn~;Sd5NlA>_o6L-rX7 zbh-3$;|7W5_eU)_68UaP&lu^SU7oy!yO=7tB=Pnt>rotaBq`vPw(xK+QY|A6b~%Qlpdk{38C%dh za^hSd4fpE5(<8WtP_4@5iT)IhFAgpsTIzGkFQ_qSSaAw3L`a=m9~c%Ul{|r7iZzne zj!I(WUdcyYU#suK-Lnlb9o{uH`xV8*X_HQT{1x=qz)J1+oVWp5Zs^2&NNrE%NxcN` zx;I$tu4>Lb;vJ?*hHv`bbJdWHRBWt|h-(O>9bZiF`W>x4?=g(*D`e$hgbOmDPngMi z>WK_zqENF=*12-$vaF&;sW1v|2%B<9I1Jzf=g?p)Y&h6V;1I_{e0n6dLtd?b-San6 z=7NEav#mDoX|@@`_y0bd(XYZP368lAnqHY_xP(GeeGM^)wa2RvMlqk9 zuMVo1h7i92z~G1(3ovBW*rRSHBSr>gx~hJ^P%!fxT|%L)zn}*BbWGE1;Rwxan#{Hm zeWKqR85m-6#3Z+QY&}8(&f2$Xs`VKNXuv2nF8#&yMK|9Bk_MdowtzWnVg*Sx;fG^E z@%*Q|z=^NfMycGP%vDr7HJH!#dbau^YzLZH9_?5On?t4Q#0K2!zq<55QFEq{kAg** z*+O*0sp$K)BNj_KATFeLT6V?z|FtI0^Bu24J zecCXSLJot)l+-l6;Y`;|JKc*{rqiIaZr(jtm8l9$tZ(eiQ0`!7V^J(rnNd%-FcgIwf3BvSzyJf% zTQ$bgha3Tq(Q8~I&TmRsQL^d?Z6eJj-Obm+7RKo>sgng)-arw8JJZ!3AsgC=K3SQH zP%!$_oPv>aqJF0q#B%Bz274AV)<99e)>OT2b~QmlSs?lk+o+J~RFLY=EkHSZQFPoe z-mF+za)x}Dp@3xyFW7vx1oa=byV}v7;&3rm{K>mIc=9AuH#?0^QZl7oR~w&g`%|3~ zt=@PEDGL43dGDI1?UI{+HU?U7%nn(O!J%O)5Nz>C4WP)Me_D3z+R9Xnv*E%!XIYoU zvoQ9Wmc|k`%R1!t>dBv-A2ycFQyfj1YJ4J{e^pczOgSReMWAx`!sLU+{Ka}N|5*hA zPs;|nAK%o!9#Ze2Z<0&iJ9}h@9$}B)2h2z*oiEtrqukOCcQ*lCT#7iBCBy5!)f5~FybKMYHcm74nlt~P@5(dO8XxyJ6b zuFR#gx9;r6MqI@Emb-_ZGP1nxSR}Cgc&O@j?e2Lhw;46E>88MlqDPf7PsTfDcm68b zqpykvjNS93Z_mmhsAY~}VhJN7pr?mk)@W{idAM*dSU6D^5uWaA>(YXgb1R%eb;6n%64#8+(}7UYzzq#}pU6jpo@hsWlatdrS(dc9~l+p}&N ze4!IbVh_?30A)Yp=S|+40x|njta`f>8|Js)y(cyqCrj_oMHP*MQn6+r@2b~{ksLHV zG3~5Trmc&M{@uwIX!w1c&9u+m)z(|FjsYz3qUlEIvXQ#z!!w+@K^J2dO1kVo^wchW zLm%)y60Ux%`o{&LdISc;iZRH?5^qy7W zp0cZ9C7;OD%i?)0|6DIDvD%}l<0*V(+)?f!=(^w)3Rf5<~1A4=^9&;Rc2d*Z~0Jo2GbhP(x(`_S|FC<^b}N|UTD zy#mAt1)YneTFc8U-UWyjaw^Y$4r@v_<;G_<=m-my3 zM1kM)@k6-(?;44P|4ZXEBmgi)3;q#KI zjfXIp47KXBGd@1v$Hbq=^?@F zuVgeex$;-o0R_s(zp9h(BVMpzs=0@Q38;ai@v}e1 zzJKd}2y}x1ql|CF=b8!5MVtfM0f4r}gVB!w{p6kQt)1vaMPqHn?epJLon->XCKsG1 ziK-wrx|hq6m!8=C!|il?3ixe&Z~-GGZS%o0m^u|p>!B@!=7M8IVr~ukG+7VPZ`aLl z=2d%Y@STtpaVckPh?%eYz+M~0Yt4=R7QLDUIy`p-X+j&F2k9QY`zJ{KAke^~74~*@ z2UcsDTU_R1(-~NYC2RYxmf+Gd<4^6VkCOXC+R$;-c#{5K8PGXFjZuoI@nv**Fq4A_ z4?hRMK@Bn$>VECeFDD19+4Ve)Yvx=RVj|0Njd@g0+XZ__-_dN$##63L3h4B{+WmEP z(=gJL!{_1r`vI#q?p088|F_}3#ex3&!`;@FAIRI&UD&chPu-pe=KIMJqrvOjNKZKy zWR*i+m*?BB12==$#|0I*&Z%uGIHFx-Cr7xPKpxg)xJFrj@n2`;p&)}=5nuORu!Sg0 zgzm)*^Bw4k12A==GQM_>n6}q~)Xc;o^h= zOdo2k6F@82y0~$4uIG&~pV!HmaR@?AiID0c`k<45EMn)7n4)yV)fp{=@tP+ulmqTR z{}i-k8$*tZwv-O}?MU@nUyG9b#%}B4h~!FDSAJ1|hDLo4NzR@Uj&0tFcmx`%KFma} zwabU7Ki9+qsLz{hL~ZoPnTVGH^(Jj2AFT<27oYg*O|m28=~c zYf7ZCr;8n0hh8DuU%=;wii#O*@K=J?%01L!VKIFo`@~U1uRrE){!Mvuv7{-fbxOp< zr%f9P%rZE|+QqArA$_NZW5q=I@PWW|R%c>#@NKvmN7z_49&!@dxYj|YMS0NCkEXCW z2{TLQpyaSbZbNaM%2OCWM+M}SBPC93*Vvkn=hCMlr!%--Yj1ytoi>ZX4DIC#i=B@% z4l#jOhHiGE{jk3Jach~)Yl# zY$n((+2t=Qlq5gXV?>DdRPpK6Po~Ft!~3)D-tsP_J|$Z7rekYhwDL*Oe;E9B{5@ zmIehniDS=&v|-&be^;>)xK{QXF2#9>p0(o4xj?TfRp!-t?CeXY#5@%#v<%SnJm95H z0dvCbzTNNi)u)xFsV3S=^IJP2gSFA?TlfA!PY;x)_My=f9_z%EeAO&n3si`7%>or` zN0WSk#_5t*kbcXmB_bi$wrCLTnRAQsDFVEEoQ zd&8Y-DLaq9HZ82(JFY*TH!2@0CUjnjx%X&=gNhxX-@7nplkw zfr*s#L-7f^uoTjEvZzX_p7>I-B#?c;Sk>{mx;!&5$0NyH z-^K_^@tjTmOM|+NuK1z3UA7&ZAn8baS_r7YGPwB5X{U$4pFf{Ly<2n|RSgLrz5?TT zx#J990*R-uZanbQ=trRGo_W7<#!>QL$1~Iky}0zI6fFKA7_^)qMiW}jeS)-eoDDdMzbR8AUIP08AXY0>Ux8_%xlv zlFRWW9M0wn#&S|kB+9TJBXDIg94_MHs4|X%`Ah7I{GyRvQ#bVD5bFyL`z`7Z4vL|e zIlO{e{0zZMfg6Ms1p80R^7Bj2Wd_}sO-fWP4|Ku&6;HGR+Vtvj-yqMB1+I_!fYheE z5ldH3tf&Kx0uiEf-d|8N=t|x^ona%?laA%Gh8z+&uWJG?(d)5CDlflHIio_nNeR5? z1p!&L<#&A}dN58-dtCoMD33T$h;VcG-6P z0f;1#URFd$tc4m+3@oz_k?APGBX1#as3cL4He&GQbk5+4I>=LAR0}$%sK~^0p%KDv zP7OZw>d)2%YMp%N)P>xxK{1jo zJmT|!u~0fXX%CBr-H*{d4Ax<}HD4i9MqaxxxNpB%7_cOF3Wu_-7SrDvvK97-f0qs% z7`G8ZCOLlY%zU0G5a`jRUy7RT>4bT+d+vQjYI{TaccaZ`Z}LwF$N&IzDFE=%;6J9@ zF0LN7W`B+~zx39e*ThkMwklqs(DS|$;p<5%)ze5zV|krvuPMoT-0*^jl9D2CgXWtZ zoVwWq!ffHg2^+D8nht29T5mJ@viW0HIY54MJ9o$Hv`pDV-i&9lhN-39-{V;iTEKhDdtG`hkI^ z#q|QC9;C<(jp-v3g~DFH0)M9;%rVah>!a`HV?_+>)N)oC#fk$gQNldbO&hMEoR~@^ zV5QTE2r3MsOc5Ld!8joSd+~k)M%g&gU`AGZgD8j=x_g5OA$!X>aKzdL#Ia#&0{cW( z_C^>=I8#oJ7!pwPsVTP8k&mBt>H0`w=|s;Ar7-z_q~mi~QnGC?2s^hz&und#YE73! z2%_raK##zp*S;8BlD(JZy<}-Kh7G->Q(T8(H{}Jp7SVO?ury1L3o{(Hv}v)*oXo7z z(MN4xWn&oOZ$-RCieuGTzHgXm4}#T&G!%By885XuG5E3C!$i$%`(Q|1Z3{L(^xml2 zzKsv|5}m3I5#2>9H63bywKmb~pMSO{P%!Yx_>t3~E+?jf1(w>AsY^3ZH0Oc3)!>{G z#rY`>JrL2&!<9ZZ&yQAvE`f(AJz&nSYDe3iv0R@)3%gRjPHRj|l?Rt-oRGW@zHp?} z0@#0)vY%KrKFL}LxZT424Pg2dm;>#DMi4Nh&i_p89V3yXY}<-m);u`Qc;RrvIU$zZ zh280Er!N0M>Wd|TqEJP!;DLX&b;$dwd}-PBw%jtFFcrz9Q@neB>H58sin@?vOSn7P zNyAnyWB)dZRdRo`(tV?q`P~8HtEr_;eV)rF1^&J{_TUqK*j-I+^rbm=lj`PJy|WN& z%hsAu@n`FwrUnQx_rTfXZd~R0O$ia$LoU3?1<7oQ#kX1<|8}^M0&&(@5nD_Au+&*j z#PWP6d}+ZwPJDuxy-O3;*a0~dPBYB*&ROT*1p{kqPHuT*FOF_`sDBh79|e@R!1~73 z29lc_=l#G2LD%cYAsgQ6Tx*DnkjeS_mT_|}^%AWyFs|dXm|^9BVrFMtUXS@bt}!9h z-k+o9@`GJ-!X>G0Ug4kurZR-JRH*L3{#;m68789M z-rPqlxi_r)$+2=!G%S6@dh{jW-EF4#rkcDl;bAqxsM7xFWno{xG)+yxK(5GlIQPkhx+5!TP^Khxs-!lIAgRPmZgpeRahD#kJ&T0r5 zW4YewryP^@ae(9FfIVc@d+WxKS)p(z-zn#ben*Eqvj;-4o4}_lpNfk5t_0aB)>bpF zj+qr_!qJMaEZ==5e|)X3|4H3i#90cIRK4*$Agxa&D6M=0YYz~osK$+x)Mq11=(X;A zLw%+y8FLQ&9s4OS<9mr&ktK#Q<(#}`{XE+0o3)9VACeB-wp})sDYIoL#pSkwGdJz= zw{Y)M{ofZqa4UPhHy5E=-NTa$F&;o@WHP;+TaCFSr1IU0duCBc{T?Ob9+b+kSY5Uq zBche&mU^2q=CTm`?p%xzeqQ=?avrr!rt_OWe=T^38o|LqpI$Q?+?6HV&5cL^+-*g< zB*WRz5>QiLKZ$9YUV{1%xAkzF$4bzNF6He`qldO!m4l>jZWdAp}pPGiR4hwa?Pg|8mec>elOzf1r zgxt4?>T69yRo{}{vKQF^rN<1=qU(koxn2vF&oCuCLz?lb{1!sPqJEJh|OZSNwRHiIB637_EhStrRc_eQB*~>5>0WG?CgB{ zl~S|rtIwEK6asJ0`|bLL`DYTtk77-n*CM?=3W+9uoOhpym)0MJJ^1QFCVA+cwHOVD z@z7TW`wZB+1<9H4v&W6HX8P0qJrrwlGHI}jRn=i{+qTNW zvXi7}g%|N?0@N?avDiP`?>eoSsP2pRjgH04YiZbrvj_Mb8kl&PkATYo^?mNa!=J5D zH>jQ8+Q(fteD?xX?!($QqaswaFY{3>e!TVr#gm=lOpYA)LVx1Btt01v#p4yKkUM7< z2`AzeyVyF5xFRasL%V(~DuC-Y2L5ifggEml#1OrTQ0FrTu@9eK0O0`FE<*;TUnFFK z-~g8{>s^;ALx!MNg$}@^2^NIIkneRA4*AVlAnZj{AdLRUuThcYKOp~T19Y*!V7*7B z5fy^N5Y7*QC7BggyBQ+6bdv|m0>e0fIWXZM^*W5}=|S!7cI@Sj-2Skn}-r?t=vC30BbxUQj2 za!c6FEjNRg_9^Misrh8M2%}j&;;AZHC_gv<#U{ zT$0E4d8|Es+vAKmeUNAuD$H$%iPt|+%#h)s?a^rKD-adGW)hnD<60`o`5&ht? zF2qfvXH)th%l=2DGVKS|nB2cq{&^<(59FVfe_qC|Qu-ixjejPcraq+WMU>Y_q{rA( zPxhru<_)P#_cc{M-y%|35wH9D!%NDbdLAkIbDHtwKHhBANDApIq$XI|S8pOBm_OVtB(Z;gO`v35GMlWNcC93ZCjdMF&>v{*#-w9>r# zh;CVDxmx?oWU!S#b>yZqyvX`yXJ=|>Yzi2&IpN*;E8ymUa;{sO5(`|QQ&HyhB2(^w zXJ`-_{w{24(qei#+M4BcsRYR&shvV3$ExU#%;nd8YTV>ljKd^%o6jDP3tsB!#pD=J z*qpy`U4k( zT@@s5t;-CQt6`P+Ss81?P|U#g+-hd$_QUWf0KV?7xuvzLCq zg&((uT2cG-@BF-J-oNa;hZ@H|td8{2P5e@38aHn<^w9KGFKneVdcw@_vyuh!qOq>mX%F3CGI`b_Q zV8=NPGs(VF*RpiLw!izPzLjwPb6}?eS%f8l$M)W#D7Ae`Ig57ldrZDfh62B%&!A)1o)iL2g>^p?PY4^5h6=$C}Kq09JiBNeP-i-#E+zz^Fy zYC_VL`7iufI%K5HvR2mzwM`eGgozk>HZFC}!h@02NWKENq(?%_9lE^H5$;SB?LEQV zKWT9!rCsO7#Kx#mC`#mi3m0FNCgsx#FrZF{OEOvX^#n8Ia&2wNMKBDvQhCUUM&LYR zRYduutlYveH;-u7*x?%6`pA(~!ProZrw1db@qj@v`wYoqjttHo8@K5`bSBCVtDC7K zIMIYT4x!%k`|>$6Zg0lkp_CW-sv%5(&79U5%oJC$Q675R%v9fTb1PNj*xn{~TQ}-X zo#8XGRBi}ZOyO4d0K4QAy8?vAFf=REGx{Ku~U_646k8~wL@oujbeVwo6WW-GxxPHgw512mt@E|!5k_#uMt9r z1!j{u{8e^=lKKh0_F&GAwq{9=+cC0KjfAJlny89b*d3|EhIPMDonQM%I|j_TRCnb@ z_jFOq8j5*s6x3iYrX9JJ@iMDbbTK(GY7UDKdR^bX9^<{dFEUeI7O5|vd2#5yr^()d0R{ac;# z)8><;!^eUx{fAbG{txBA&CFQkFH&Xp!iIf6F*;!TnZhe9)T@}AO~!;x3o~tlPNta; z&|gjgS_-23MM0np2y{1bQ$d`)Ik@C7IB$~f?&FU5L|J4cjmm@{!kU%s?NbdINA8r1 z(}#{bItGHT#c+Oc_(gr-39J*zubrG~*evtt+imwa)t652sdZ#pCZl@6$_ymEj)KgU zvua$&MT{F~`jlHJZutcqD;%$=Krs?7Ua~DKrtYy)OjYE$R%P|`Z@?ByHf<%X{cT;% zICTjH7OUw=OJ@kHF?w#g7bd!rs%-bPvt6sKEO99m z(wcn{6LRQ<#t9Gc76-PoB`#a}svX(cl>YZsD930Rbss@ON^*xD6A|SK%O)cSEOP8O zBYqFxI3hwMZ3?d)LG1?emGDGSY2n-?&ywC z;|XQwE8|CbxIGh}lVIUOHpMYq`Nl(BZ4*5uo(TxR@9%083$mFkpR#oQ8~3LJ>0hC< zj@8kXG;`;$t?ppb5yAGc{Bf|tjG{e8BVT-_`#cbD@&a- zYpCI+Ac^a{kKsZLTKJmK>^#<}I+6q%18#}dblb^&>~&~rSs)j%Zfp#N*G}$!O*M7o z_D!u`Fm}0B<+?$b$LvLICjm{M;rUCcxAQLdLi0Nu(dIYI_!o4!>j`V~w{1yz3s>3F zpX2CzSHu#!`-o2aY}nWBu{n*g-Vz7){65`|`)?PqjoGIJ5lIQ1l4Te?Z*gez5VcPT z|4tZWUpbqx|LEQ~9~Ol25xwydvZ3hg;ON3=;^6$J@qA?T{9nk%N4LsO?E53RO!VT9 zcf9;cQSj* zCK8gNSQ>qsKmF`sAiswa!Rt)#VZ<6ehQgb%ABU7tTh4^InO(Q77!Vhj^t-&)q+Y0z zx>(;ff4>_GnqV-+iDG27ovlgN_7T@Iua1Nsq-}rH$_{&NIw6~`_$S@jG)7DSIyx+s zP*JHAV*wUsU&`CLV9^VSOo#-W+g!tsIj9=$+KLK&Hp|&sKVgBR2C(%-7l_tEZJ0}>p!Sl+d9J|f%(4fV6}eEs0r_| z&pkV(l(mS{CX%=&;^^!QH=ii{rZkfgnn~p?ZLO5sW+O%I$}%cx=&z>7W-#rH8wxd< z);~4E0yBu9Z*s4mXkXJ*xmJ#&91f<@h{8~H97KiR*6lJQoXa8D^A+34Wy^K;A^JqX zY?Ms)HF}>wH}w<~7$e~fePAlZ!Aa04MlCcNxN1=VEt`1sVug;6NvVBe8pXfNYB*1! z{bS+)Z9no~;JMGZN%k-anInFG-hw2mU?(?4QtvkBF;3Bdz`#nfG`2zXtUE6ASn}6Wzr+8&?)6W&IN3ko|FsJCckth* zzW)UKv-~IcZxi9aGyHvS_D=>9w*O@Kmzmn%@&D?b|B3#1{Tu+mfAiIUhyUxi`BykN e_g~=uIf^REKzx|TA1S?XfWD8BwuSePz5fSH9#k>_ literal 0 HcmV?d00001 diff --git a/TestFiles/UM-Whitespace-test.docx b/TestFiles/UM-Whitespace-test.docx new file mode 100644 index 0000000000000000000000000000000000000000..c72ae3f185495cf3564d4e04f95e72e8ae0dae76 GIT binary patch literal 13319 zcmbt*b9`Of)^=>$Xl&cI?Z&pd?AbTd62R0sz}VOd|adXE$g7K#-fa7C%Ai<3}xe=@CTk0$&5BSr;W+$P4<0bEA#1 z%^m?unPr8ICCo^xI?vf9krwLRw!GV4!%%_$_p+^HKFb zedFjv#*DoZZQjg5!RBYc)6s)1N`pwQUN)rI6TZ}y{Nh-Qibm+aFNvyH5u!IMEn2`g z2eTv7=}YB&0g~xp!QE$e50RkBK9V{A+)N^|DQ^UCZ5XElg_>8ZiMX&klcq=UK%4V^ zw*h>g)gT)CBX{zY@+;ipq3(lOEXyYu?X$0Y4QqkKpYh1wc+8^;e!$_aCy7uakV zKw(3?_Ufr_6Ern8j<@)i*}JZl>N?gg`TP8(08Y}m5LGWOT^Pdf_c91j_hsjT1Fj8E zD!2$k*btWYH;tky#;e88((@5w*f?JdPtFspzi`0g5XSJlE}SASJ?=bYH^agt11~vv zc1Csp$iEo~>diO+0Oby3+CMM;{D6EjkddvSyo0TsBi&E4C{5^^&%Me2Fv|~!zimv2 zQKe!0X0d)C004wvA@uF+{*!?-6lLUk>0#Q5F7Zp(8=Y`SncW4~8chpZ4}xhB%*ps$2G?93k-E~o4GA|~gBV{y`s2>D=`$?UkR2yZ2v<06r)c{fDm zKM2y0+dqWf6$GHGBhPRL18BvG_))s96GoP?>2}?W0_ik?&kR%BNrp00)*zf4)@m8Z z=zXvQz7wZOsX5f$^46h0A%TiOoxuGWh%Doxgub27ygQM!@cRt_EA8_@5>8gpzB{xm z?^ByAG+r?uf8_8gPG>GU;#WqlZhbLwN<}^>48W$mK?u^?*>Y#zxPJw1$OyiVxvKP~>Zxt^D6rOn>Vb^T^8#9z&2XzO77AMF*}ZPP=KAbg(i2p@AE z8b_dj5=c;wO7@9TV6K#HLAQ&fn#Pmx;?5ZfLd5RKdg0oo29qlbr<8Fqz*(`3K_WL9 zB)bVsv0`w?n*&yi*lsBPeSs=HshFLSA~)~Os*$#qTkY}(5lGq#*%9N_h+(j4qj=5^ zzp&;Z5-tl(t4@F0BSIe zh1VDE#YG^ULw&K)nc280565&-=goMX3#>#H*sylU`3K!zt*16OfP}!d0tUBvyDgZs zy<3x)5h4kSk$`Xw)UZh`u$cRhjD-ZDX}sA*)#gwGsC~uz#X-v!$`lx3tno90 zfz&8MsZ8t#@`A7QjtO-b)ah*8n_uvedtXLNT977`xr+m4DAVI2s!>nrUzt$UP$Y-I z$z(;B5w4NxSdcOT6;-BYk0z%q_OC~{2mwEv8lTEHgAd^I#vl*04(zt_>+6SBzVrnPX+PgGT8Y8uPC;;M;ISlE>oHdMNPIT z)d=WkFn%u%848{KX0kzXXa@VrWPaRv@l^s7a;K<#n9j86C4dN1tC~YAmr4p}l8j@E z$Tf^8I+8H@>7|uuL!nuh?wk&%`Jt8QDKx5Yf&daZ3#$NNVn5bPD{F|@wp<~ zW7MZ3u&*5@TxPvLt!{T8pY;9WgCEAbkCh}l|?$Gnm{vGkIqq|%D+ zOUYC4KtAl)us!~919XcvmAgs5uMmImBon;ZVQD6OwaZbJdX-)7Ohu1dY!&?S#YE9H zVzOfrFC%+zcswM4!E^BuwMiZi9t_8!Yrm_)B!s^wIgerXC25oA6a5~jt#L$8dYA7d zn$`6dj|iP?pTuO;y@2n_{T=Z4JvN^M!v}R<=7B4)yVNEx$AP_NCl^c17jRDxu6@4z zblxkk2FTSZ*p8M#q;H8NkFaEqtImC4Vh|HCw#k;ZG1pm2@2QK=v$iny&w=`967 z7M&qhX0Xd!6sLc3r7JUQ{<@=1;(Vy4zN8q4Aq9d2RTb4t(=TM~l9Eu{&YzFHe+ zH6~SaN3Unf636R+w^Vh`S3pt<9F?!(Jnz^gPav30Uz)vjmDVb5jt-sAzYZ&4zs+ED zcff6g`wEXcS)1;cd&2NNe{{;+{@#Km*WBFq`uq36%Q6kn=JeyYiM_1|;I4damNrT1cSEp7pK^F!>cLKQCQv-fgio9mp4w72M7P~yFvUCi8ve@lU`63FW z^**YbKHZE%^xx^XZz#7{Qdlp-rtVaQEx`b_7+U(BA6PR$tb+h4t8F4B{p?Cl|0 z59UQEl2um%1{CM&Tm=1CLog?W$~GCAp3KuJV5*go62U6DGYQo;Tcvx=G<=K@>R35zB4neWV666 zR$xoH9K3m^I$dPf`5G0o4;^)C>xyrkMoXtdY!%_<&3bze$cYoDYy*B&k73kivlcJn z>hq>E$qpUTY()6GDGD%G$u1a!ly0M*sWD!_hmadjX{4r17 zca^vFm6x;Al+U6!6azXRk%)`N)Z zF|B{sl}~NHsv2$(K_Ypy-$5`No3Q&>-go0{QRk8mzmfY$oyWOD4UuP|s?TXgOCEyR zP3S9-d|wOIDG)9oNIivhgNahautLUq#=7$y#9H~ay3K>kG~Q{-Qy#4~F&RGgCEL6q z@g9E4Po7vyiW*}0<5axns5Zozu()Q0vF^tsv3wOxU!j8Cr3!QqY)7(qq(jeK876Y( zhGS5`K~c8A%g4-+SyHyh7Y~&wfhs{wK#%rfWZdv?7W7bQa>rXY7-TaY{fjD8EaT== z{C!~;<8I>-ZAKJk9kc}ZZW;(cpKHGuh81#yes1lNR{8`^vNOZzx4Eq%2-!usgNM$u zPlfaW3EN5$AV3np^B64Lu+au0XBBn5L*O;KM#*#)_NuWjq+Nxa%WPon^L-|4&H4BT z5?DrPtoSno?w09!=l1*9&#$tv>r;eGq&s(6$-1uO2G*Jy&>zBglKpi_(1l>Pby#AO zm}6%MFGf&IUt=E$)m~#_YvNS(b#sNPk@jTCIZ$RwX(`I|+f>M}VnvT!Ai`(r#LQnl zsZPh%%;@?-hZ_ZqDbE?Q7aKCa)9hWtGmFjf+K7kSZErQ#%4{np%}LG-0s;Ua1@lXSb2N5xGPf~x{42rzZyN+^*0w9G2(P^FzY9$5 zbIg}zKsiH=Rwkiba(O+lgvhtLCw4LgQ=I_GMiQbex`(b|bC_B_kvTE~r^S#i~4 zcgzf#Q4R|{)^o2E5~gK9eF}Vf&}V9P!L|O>6(S*kb{1oHw>DnY*~UYhiUDk~3<#Bj zoJqGjlT4W+>#U0u^(~=11mN?hFe2b83%*@(p7}ZAq&C4IlXwNkbg)b|M^CI~J4=$- zOoJZt%KXGYb7>?E6lSuQUd`g9`%~$8#wrfP!W(v#j{-1?2eGQlHJA9tz@O`g`j344eW4uh&UfQfHq#xv?%HC+Fe+pxH}>U z+(hX|hm%IxD?Otq)T7+VJ0eV7wsPG1t_`~*And#mMG%(RQ^gE%2~kHd zJ_1*q^1E_cJIOBKjrSgMPE)@5&K2+V(Dy2^;+B3Kg}D&jQx zITWG;9RM+MKU)4g*ieKOBn4~co8-i@q!mU*gEB?Q8AuRkXDX8>im)GW7|fF3lROE$ zb*3$3kKKMp?1pAsCqHJC);)mrL$ahHaVL)|$lzK;qdj<{ z)vi^t{CqaJ&Cg!=5_DLC2ft?YLtRQM>{zAI1(@f>lnlm0g`0v5U}F-sa@KAd%>bqn z4Y10KLc3+)DCty4Ojj+l^`Q2Jje#zJyT4(nFAE+2Ch5nD!v--Kygs5ZnuhIe$ApS z&7K%!;)9Okv`7Xw#H^)l^9fuEexq8v`5<8aEi#2-KI9pLC6;TUZ&l)T3g7i!oJz~G z^fSFAnQh{_#THG~!Xas7+wt8i-wVfQUqH6+KiA5CZ*P#UKZ={YZEw)Lg%p&3EWcch z4V3=Nh>|g@VB1500I+#S@&y0UwSbdF%8*3^EvcPas^JCTV;Kq10zhSp9-n4K*=7G( z=*(@$D(;|HGh3=xz%`V$s95huq|ZPFHE9t(-j!g{@%LR7J6JSE-vBac(eGTZDJ$N; z0221`@q~umlptc#2$Cxew2^n&@%15DJtyZearafM(D$ynsKJ4x#)8+YY6b#hlMP2? zVz`F;yH|p6=PhD>&IvVzb3&PZ6in!tE7hul%Am%Dy5OBR{5H*`y~M*zhVGl7cl`y)$39#6M0nuRd5bPG2McwFnB2*aFp=VVY-qT8$K?Gb z#P*-Xf#bG^=b<^^pv_kVmo3C+omxWf&_}Z)9{o^T$bu8V2aguCEz8$8bhmJ9qnJu`3 zvJ)w!CD>dG3kZ-JW>D}tOtv)>z`Mz!s?uWCxL%N-a z^JD0huZ^x#O0c58_?%SUr}p!Oz{2lN6okxE$KdA;zNU)E9quJi&0{CtL-Sq4CB0sp zSS^n-C}4-F(|RXuK(Ogt(@)>Ngn-AA_qgraZu#G?2HjG>vqgqX8m#6%bpCWXKL#%~ z_8x}cawAfhI&m2O%IJz{hTNQ&eb%=kfabkFFbv@PqpK?R0dCW81;z>6p6nBKJNLSQ zhOYFeAChtrdKK((j(kVjdcxZKw7%K}ih3K5A~hIXS|l~W*!9^5CUdA0tQJwmZF5>H z0_n|5_SSn|o-v>xYJtQOx6i~x?V5d#qbPgbH=5W&S=&M}-9C^#e<2oHKt6-jt6>&5 zxM64szlwe!d+BU^CvC9*;NM?)&NsB35LxNH^m_Uh{dk&#YuTH_yj5Er3to%Ywg)7P zuPryOJ-z3)kfQ~gpP=J#`NmlLeG{<<`V)`ce6_lI1`V*`^}IW(@U)F*1zDY!45!PU zsI1vB^gv#Tq-MnT7;YqxYn;&C7p|Tk7Kn}g9v%43lV5#Yt#A1b^H%8>7Qp)xA(J$$ z>3NjI5bh~M;Y_c$dW)3Jhi8O$BCW1OOBZ?NrAfI`9?xoFsPC?JJr6BcTjh45Ml)@s zY@VObXs<+wYfp^NpSd^OCKp~S&fU4oy%@9~@V=2Y=$w%`$n56 zor(yYv_BkOMUJ#O*bc=P;wK1XcVs-S4Aeh-hgu{^s+jQUPieVl5Ag2ez+zBFv){Ek zPB~m}hM$6sjGCLeSBI5(`}(F1JO1|dtPY#udmQvU6}J#0;LY^GBg8O7lz9-r$}Ea7 zaSBJMIGZC#oZk^9Dryho8DpHGiNZe)Vqqr`)f6AItv$u_9!r0a$4lb#HeLtNqCGsB z7uVU>XPZHu^oi4^;MzvqJG+_z$J(64YuO-;WT&W_0t5+iq_biHTT?7(5-GJEW^U){ zZNy;-r1v#riFzSiv)YAx__v=V8jsy`p~S?fGYY;JOJ+`hPxR7pBVeD=PsVWzzV9gh0^Dq0R~8B;GJ*JFn*+qZ6;9~ym7;sOPD61xKUO23}aOv~GP z#;?I!jUM5tK52x0yKo@xRw%`;WH!IQHSQ(SNaS%QZATR#$ZQr9Ym<8y^PtJH6Wlvb z_}!#kqk1tKF!!aP!|k!>p^G&`?tu2ak9ginnwsSvINTb<*6r6`&wT+a<&+Z6Ah3H) z;!yQChU=6r-BBVr+(ILdaDLw~r+CnVn=&v;h z)K_L^u85PFViN3#mzfo8fGSqQZTHmI@ZDzO*RP}R^v0nTK<9uV%NeaPio*GtR(R&T zQQ(HhrmDvEK1P4YnFts3(1J&j2#DI? zCW@fi!*@A2R;KKfF~0lIH9|_%Birwi5sd{1K&DrhZ~gOVT(fkzj0$}rXC4sFfcCBu zAh_``CrYYyJ0^V+t9|cJZ4=w>@YO;cJ1I?{%90P41V(e;Z!mDJzSqf;3yy3|;pWpp z2Z!>NX27jy!vosB)Zac8HyCu7nL%pc;IV4PB{^50(A4Z<-KU`u>#Cu^vr+%1d<;=b z`klg(JK%a8%XSQXpnjZ{L(yDg-@lfWZW$7Ff5dri4X9YQHJM?h5gMA;DCg;6p8dS(886Kr4-Hvc3x$W&*7n!&uN#*77Tk6sNY)r zsDk=V-VbW50ti)c%J1hdK{#d0a)MFT4T4hEH9{(jSHFn>_#ajOt|ACRDeu>;@+;yO z^FjW=+om^R^@F1LA#ZxWq5ZBVC?$O()Zd7y&rF>n<{o1{AF^(kd8?!EGvr+4vr74P z*HeV7qbZ|C#w@;TeOwUa=BVf6#)(`OdKTG)<`J=Os?gn?m}2AgZMS(1lnY;QtK&&{ zZbW#@r1%iMqUq0(U>`TCMk5fzW^?telmx>$Dy_zL4oMqqP)0&+(D+h5tPKZnyO3Fl z0S{v=t;V?@$HiVLsuH6I&t_x{RcNtW=Ap@e-Co&Yy)m=yq{#fii-Lf;*)Wiv2|0BY z$-+2&#g-;DWlEKYDW@XB1V~bj2#l^AGf;Igq9zV22vgPKiJt}ev!alH!WRHTIu>CK zDOd#;#ooS<1T3V_iYsKMOx=dDkm5 z48rzM1~oxD^dlstQw5>TDfTbdXqF;Smyr}F=+rBtj#p}ymYS;3s2JCkC4R_snNzIz z{Ntm?WiD33Lc3m>tlm3&Sw+fOal4cx?@N_i{x9;*jtkV&Jz1M#f%^AWRIfIjavVg_J7ul{R{jjOiIB|@Lys6$8Ye8 zBWG=*%HLU{k-)7zZP{qFNvhu^A^X4);e~HP-{CAPc}H`OzFtpUsfAS}y^G(FGs-Gq z%U1KL+%dNOFz)vle%|JgLTOwc)iMh=wMe zIaj5xvz_C?U@-2oi_m|Yn{tdAUem6nM$_Tg;Y&M-u27B^HtLd=-l_^tidq8&#W4C0;`P!H^Cq!|?5zvUK z-%71xU)$V5z1GCOV9?Euy^o2NVJd#e-HSF+3p;w^$tCa*xn?;v(+;*#)m~$b zyJl(RRE1dJ8QFn+hsD~uKuE?JR0gv@rSev$V{GM=z)(;YA-Afsry_@m&13w{n!A7U z)e|@FUPdfB8H2|_@DMTXyq9R8D20pjvFpnLu#zNxa%K~JL>cdUC%@t8xSk#Dmlaot z`lFe7Ts*J()bqLcE^HG~+hhfg-jmjgphHBYo|u~~?3*m5BR)Qb7kCO@E{|t;(<43X z7hCdykbTjYNs1f3?`dzPf_<}8XCo6PHm?RQz1*4|U5XDu1nq4z9dsC~`iuwcW<_}q zO~@>1D|TZZ4UP6d^uC9!!_%6cnl%uNCA;|Ng1csuBT^$oHyt&)m;IJ_Ao%LH*3}HY zESsamD_rbp3nzL;Qc@VFMBeZ{5C`XY(w*Fpj=rYhGJAYc3=@qdi0kj=e`_DJbWULvk>s?BcM>N) zL~vDkd__HDbPQE?`%-)nBK~!{QSttq*F8kpa_dxKvtJ zCLwNCf4R6fUOf1)enK()r|sZV4Y{}NV3bbyMOOkR*PFpuYEWnB@74>X*3`rbWw zM)P&t-QP>=w0Wf?*@a*XGFxKbf{p@^haW(g+OIU#vRQ$EZ33APiJJv255U-OUlqvl zHA6HC`zwOvdkr+cUlIQ})F>S%R)c*7%`BFJgRPV(NSH>=Dh7im(N4jDJ9;&avNZ;P zDP6Ffw#^WQut=qhl_VPvAM%_8sp};C@lRsL(S$g|JJY${$NzK%-12d=nk-E3-Zu8xttc2jpKZLK|? z*!NMpPQ8WxcdBl_z~^wBG^W{`Qu2a!tV_aA6$BUP2P8()!zwh~-@MD_^t#l@Ymsyu zAEdwZYeK=3V@vWMYLnZ6^-+r%Vn}F{p~naC?s51r0q@Y~X}f4eS&w!((wII_(7-h! zl#5TczU!D)xd7xgU`My!PW@t`OhA{(SpDvKSN^V>~j>+dw)7jwf6sFirKJmj=hAVTe8#C%0d{8R{ zhx)qqQ3iA#po**nXf-F^iDI_$(1Ss34r-QqUT&gLWz`x2-r4bPzKuJ1kMXQp)|gHJ zYFtaW1!sU<2nfZ{$^Vn+3En`Mj;mcCd66ODnf_Fux@9a+8{IcSHG9ez##zMpS5>Q8 zM|3BO>5}t2AykSxFk;Sx5OR|?pdz3`%>jB6l^s@;(5RiW2$8Vv=1-YXh&k`+1=Ej> zY-&;j?>OV#rH*fL8KqQ+k==5IY=k7shIli+Atmo=POwXrE>7MH2a$s`7Ei@yAHh*9 z&%o1{Ip66-^sStpaxual0_S6-Fs>!?E3ZW}F$Rejwe>2J^t)ipi;B5oQRTEpBKV2f zeMD%wPJxqiEG6@%iP6}loLSn#QZ}qNGYuKlEFrgHj_TKjT+%_A#p30vt?y$B(16m& z*uzFy%wh`57i#5y5R~!qIDI>FwE4onDv?Y;;EXy6gx#TolS~+8&aJ+zHJg6`> zGT^JXz9kTN{IC>&VRbxHfs%405_YN_CR6WV=sa7w`o0pChhkz~9`J=NVG_K|eWGKz zM2h^FMHbhlz2;DXwFloQmb1TEHd*j>j_ci|cTIJM=)OLsh7jcI>wtW$No915e> zV@b(E!qV1fAo-IH1Cd!;QUQFDr!RAShH)N>rY`etsF${M(WU{|sA*U6Jx;h0n_qMK z0Ig}rgP`-6fNV2$R)pr7@g<9l>w%elv}x9=wIm?9>PuqdNm4;-^4(*_oXCNq_fnX9 z!Ur`<9-hzb=aFSIEnrrO^K<@P>1n036OK=%Q3mqgr@ zVv9FynQ+k@Zhi4PWW&uj zD>Hr>{atEmU3tK`Vw0DzE!WNYIbdR9^Ep@VXH1tD(Zv%OY@1GO=AO(A>u)c*Wqc=h zZ3H;H^j-RpE?nc8$MF6^ws#(|3yn~Z0*hwNQM2}18Sgh;@Tje5)O@bI4c2293Vi-D2AQb%|^WVlXS80 z!N<)bDqxn|4$*esP2wA+GiGxegjrV**c@vi%wb$0HQ1)EZ^p4b0peyAf5LC_6xegZ zAt{fdsqvRcCV4kB`!1YA zuex3T1L}V;KmVPIf0*T;Nq@iM3PS#=;;$oIMfnZ(2L^nN9oS?*9P+@#A{{ literal 0 HcmV?d00001 From aa10c34fe4bb15de91f9ca402cf70a66e29cf99e Mon Sep 17 00:00:00 2001 From: Lowell Stewart Date: Wed, 21 Jan 2026 20:13:19 -0700 Subject: [PATCH 3/3] fixed case sensitivity issue causing test to fail on Linux (but not Mac or Windows) --- OpenXmlPowerTools.Tests/UnicodeMapperTests.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/OpenXmlPowerTools.Tests/UnicodeMapperTests.cs b/OpenXmlPowerTools.Tests/UnicodeMapperTests.cs index 0d2e1e9f..0e129975 100644 --- a/OpenXmlPowerTools.Tests/UnicodeMapperTests.cs +++ b/OpenXmlPowerTools.Tests/UnicodeMapperTests.cs @@ -206,14 +206,14 @@ public void TreatsXmlSpaceLikeWord() { var sourceDir = new System.IO.DirectoryInfo("../../../../TestFiles/"); // Test document: crafted to include many whitespace patterns that Word accepts as valid input - var testDoc = new System.IO.FileInfo(System.IO.Path.Combine(sourceDir.FullName, "UM-whitespace-test.docx")); + var testDoc = new System.IO.FileInfo(System.IO.Path.Combine(sourceDir.FullName, "UM-Whitespace-test.docx")); var testWmlDoc = new WmlDocument(testDoc.FullName); var testParagraphs = testWmlDoc.MainDocumentPart .Element(W.body) .Elements(W.p).ToList(); // Canonical document: the same test document after being opened and saved by Word, - // representing Word’s own normalized interpretation of that whitespace - var expectedDoc = new System.IO.FileInfo(System.IO.Path.Combine(sourceDir.FullName, "UM-whitespace-Word-saved.docx")); + // representing Word's own normalized interpretation of that whitespace + var expectedDoc = new System.IO.FileInfo(System.IO.Path.Combine(sourceDir.FullName, "UM-Whitespace-Word-saved.docx")); var expectedWmlDoc = new WmlDocument(expectedDoc.FullName); var expectedParagraphs = expectedWmlDoc.MainDocumentPart .Element(W.body)