xmlseq.go 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877
  1. // Copyright 2012-2016, 2019 Charles Banning. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file
  4. // xmlseq.go - version of xml.go with sequence # injection on Decoding and sorting on Encoding.
  5. // Also, handles comments, directives and process instructions.
  6. package mxj
  7. import (
  8. "bytes"
  9. "encoding/xml"
  10. "errors"
  11. "fmt"
  12. "io"
  13. "sort"
  14. "strings"
  15. )
  16. // MapSeq is like Map but contains seqencing indices to allow recovering the original order of
  17. // the XML elements when the map[string]interface{} is marshaled. Element attributes are
  18. // stored as a map["#attr"]map[<attr_key>]map[string]interface{}{"#text":"<value>", "#seq":<attr_index>}
  19. // value instead of denoting the keys with a prefix character. Also, comments, directives and
  20. // process instructions are preserved.
  21. type MapSeq map[string]interface{}
  22. // NoRoot is returned by NewXmlSeq, etc., when a comment, directive or procinstr element is parsed
  23. // in the XML data stream and the element is not contained in an XML object with a root element.
  24. var NoRoot = errors.New("no root key")
  25. var NO_ROOT = NoRoot // maintain backwards compatibility
  26. // ------------------- NewMapXmlSeq & NewMapXmlSeqReader ... -------------------------
  27. // NewMapXmlSeq converts a XML doc into a MapSeq value with elements id'd with decoding sequence key represented
  28. // as map["#seq"]<int value>.
  29. // If the optional argument 'cast' is 'true', then values will be converted to boolean or float64 if possible.
  30. // NOTE: "#seq" key/value pairs are removed on encoding with msv.Xml() / msv.XmlIndent().
  31. // • attributes are a map - map["#attr"]map["attr_key"]map[string]interface{}{"#text":<aval>, "#seq":<num>}
  32. // • all simple elements are decoded as map["#text"]interface{} with a "#seq" k:v pair, as well.
  33. // • lists always decode as map["list_tag"][]map[string]interface{} where the array elements are maps that
  34. // include a "#seq" k:v pair based on sequence they are decoded. Thus, XML like:
  35. // <doc>
  36. // <ltag>value 1</ltag>
  37. // <newtag>value 2</newtag>
  38. // <ltag>value 3</ltag>
  39. // </doc>
  40. // is decoded as:
  41. // doc :
  42. // ltag :[[]interface{}]
  43. // [item: 0]
  44. // #seq :[int] 0
  45. // #text :[string] value 1
  46. // [item: 1]
  47. // #seq :[int] 2
  48. // #text :[string] value 3
  49. // newtag :
  50. // #seq :[int] 1
  51. // #text :[string] value 2
  52. // It will encode in proper sequence even though the MapSeq representation merges all "ltag" elements in an array.
  53. // • comments - "<!--comment-->" - are decoded as map["#comment"]map["#text"]"cmnt_text" with a "#seq" k:v pair.
  54. // • directives - "<!text>" - are decoded as map["#directive"]map[#text"]"directive_text" with a "#seq" k:v pair.
  55. // • process instructions - "<?instr?>" - are decoded as map["#procinst"]interface{} where the #procinst value
  56. // is of map[string]interface{} type with the following keys: #target, #inst, and #seq.
  57. // • comments, directives, and procinsts that are NOT part of a document with a root key will be returned as
  58. // map[string]interface{} and the error value 'NoRoot'.
  59. // • note: "<![CDATA[" syntax is lost in xml.Decode parser - and is not handled here, either.
  60. // and: "\r\n" is converted to "\n"
  61. //
  62. // NOTES:
  63. // 1. The 'xmlVal' will be parsed looking for an xml.StartElement, xml.Comment, etc., so BOM and other
  64. // extraneous xml.CharData will be ignored unless io.EOF is reached first.
  65. // 2. CoerceKeysToLower() is NOT recognized, since the intent here is to eventually call m.XmlSeq() to
  66. // re-encode the message in its original structure.
  67. // 3. If CoerceKeysToSnakeCase() has been called, then all key values will be converted to snake case.
  68. //
  69. // NAME SPACES:
  70. // 1. Keys in the MapSeq value that are parsed from a <name space prefix>:<local name> tag preserve the
  71. // "<prefix>:" notation rather than stripping it as with NewMapXml().
  72. // 2. Attribute keys for name space prefix declarations preserve "xmlns:<prefix>" notation.
  73. //
  74. // ERRORS:
  75. // 1. If a NoRoot error, "no root key," is returned, check the initial map key for a "#comment",
  76. // "#directive" or #procinst" key.
  77. func NewMapXmlSeq(xmlVal []byte, cast ...bool) (MapSeq, error) {
  78. var r bool
  79. if len(cast) == 1 {
  80. r = cast[0]
  81. }
  82. return xmlSeqToMap(xmlVal, r)
  83. }
  84. // NewMpaXmlSeqReader returns next XML doc from an io.Reader as a MapSeq value.
  85. // NOTES:
  86. // 1. The 'xmlReader' will be parsed looking for an xml.StartElement, xml.Comment, etc., so BOM and other
  87. // extraneous xml.CharData will be ignored unless io.EOF is reached first.
  88. // 2. CoerceKeysToLower() is NOT recognized, since the intent here is to eventually call m.XmlSeq() to
  89. // re-encode the message in its original structure.
  90. // 3. If CoerceKeysToSnakeCase() has been called, then all key values will be converted to snake case.
  91. //
  92. // ERRORS:
  93. // 1. If a NoRoot error, "no root key," is returned, check the initial map key for a "#comment",
  94. // "#directive" or #procinst" key.
  95. func NewMapXmlSeqReader(xmlReader io.Reader, cast ...bool) (MapSeq, error) {
  96. var r bool
  97. if len(cast) == 1 {
  98. r = cast[0]
  99. }
  100. // We need to put an *os.File reader in a ByteReader or the xml.NewDecoder
  101. // will wrap it in a bufio.Reader and seek on the file beyond where the
  102. // xml.Decoder parses!
  103. if _, ok := xmlReader.(io.ByteReader); !ok {
  104. xmlReader = myByteReader(xmlReader) // see code at EOF
  105. }
  106. // build the map
  107. return xmlSeqReaderToMap(xmlReader, r)
  108. }
  109. // NewMapXmlSeqReaderRaw returns the next XML doc from an io.Reader as a MapSeq value.
  110. // Returns MapSeq value, slice with the raw XML, and any error.
  111. // NOTES:
  112. // 1. Due to the implementation of xml.Decoder, the raw XML off the reader is buffered to []byte
  113. // using a ByteReader. If the io.Reader is an os.File, there may be significant performance impact.
  114. // See the examples - getmetrics1.go through getmetrics4.go - for comparative use cases on a large
  115. // data set. If the io.Reader is wrapping a []byte value in-memory, however, such as http.Request.Body
  116. // you CAN use it to efficiently unmarshal a XML doc and retrieve the raw XML in a single call.
  117. // 2. The 'raw' return value may be larger than the XML text value.
  118. // 3. The 'xmlReader' will be parsed looking for an xml.StartElement, xml.Comment, etc., so BOM and other
  119. // extraneous xml.CharData will be ignored unless io.EOF is reached first.
  120. // 4. CoerceKeysToLower() is NOT recognized, since the intent here is to eventually call m.XmlSeq() to
  121. // re-encode the message in its original structure.
  122. // 5. If CoerceKeysToSnakeCase() has been called, then all key values will be converted to snake case.
  123. //
  124. // ERRORS:
  125. // 1. If a NoRoot error, "no root key," is returned, check if the initial map key is "#comment",
  126. // "#directive" or #procinst" key.
  127. func NewMapXmlSeqReaderRaw(xmlReader io.Reader, cast ...bool) (MapSeq, []byte, error) {
  128. var r bool
  129. if len(cast) == 1 {
  130. r = cast[0]
  131. }
  132. // create TeeReader so we can retrieve raw XML
  133. buf := make([]byte, 0)
  134. wb := bytes.NewBuffer(buf)
  135. trdr := myTeeReader(xmlReader, wb)
  136. m, err := xmlSeqReaderToMap(trdr, r)
  137. // retrieve the raw XML that was decoded
  138. b := wb.Bytes()
  139. // err may be NoRoot
  140. return m, b, err
  141. }
  142. // xmlSeqReaderToMap() - parse a XML io.Reader to a map[string]interface{} value
  143. func xmlSeqReaderToMap(rdr io.Reader, r bool) (map[string]interface{}, error) {
  144. // parse the Reader
  145. p := xml.NewDecoder(rdr)
  146. if CustomDecoder != nil {
  147. useCustomDecoder(p)
  148. } else {
  149. p.CharsetReader = XmlCharsetReader
  150. }
  151. return xmlSeqToMapParser("", nil, p, r)
  152. }
  153. // xmlSeqToMap - convert a XML doc into map[string]interface{} value
  154. func xmlSeqToMap(doc []byte, r bool) (map[string]interface{}, error) {
  155. b := bytes.NewReader(doc)
  156. p := xml.NewDecoder(b)
  157. if CustomDecoder != nil {
  158. useCustomDecoder(p)
  159. } else {
  160. p.CharsetReader = XmlCharsetReader
  161. }
  162. return xmlSeqToMapParser("", nil, p, r)
  163. }
  164. // ===================================== where the work happens =============================
  165. // xmlSeqToMapParser - load a 'clean' XML doc into a map[string]interface{} directly.
  166. // Add #seq tag value for each element decoded - to be used for Encoding later.
  167. func xmlSeqToMapParser(skey string, a []xml.Attr, p *xml.Decoder, r bool) (map[string]interface{}, error) {
  168. if snakeCaseKeys {
  169. skey = strings.Replace(skey, "-", "_", -1)
  170. }
  171. // NOTE: all attributes and sub-elements parsed into 'na', 'na' is returned as value for 'skey' in 'n'.
  172. var n, na map[string]interface{}
  173. var seq int // for including seq num when decoding
  174. // Allocate maps and load attributes, if any.
  175. // NOTE: on entry from NewMapXml(), etc., skey=="", and we fall through
  176. // to get StartElement then recurse with skey==xml.StartElement.Name.Local
  177. // where we begin allocating map[string]interface{} values 'n' and 'na'.
  178. if skey != "" {
  179. // 'n' only needs one slot - save call to runtime•hashGrow()
  180. // 'na' we don't know
  181. n = make(map[string]interface{}, 1)
  182. na = make(map[string]interface{})
  183. if len(a) > 0 {
  184. // xml.Attr is decoded into: map["#attr"]map[<attr_label>]interface{}
  185. // where interface{} is map[string]interface{}{"#text":<attr_val>, "#seq":<attr_seq>}
  186. aa := make(map[string]interface{}, len(a))
  187. for i, v := range a {
  188. if snakeCaseKeys {
  189. v.Name.Local = strings.Replace(v.Name.Local, "-", "_", -1)
  190. }
  191. if xmlEscapeCharsDecoder { // per issue#84
  192. v.Value = escapeChars(v.Value)
  193. }
  194. if len(v.Name.Space) > 0 {
  195. aa[v.Name.Space+`:`+v.Name.Local] = map[string]interface{}{"#text": cast(v.Value, r, ""), "#seq": i}
  196. } else {
  197. aa[v.Name.Local] = map[string]interface{}{"#text": cast(v.Value, r, ""), "#seq": i}
  198. }
  199. }
  200. na["#attr"] = aa
  201. }
  202. }
  203. // Return XMPP <stream:stream> message.
  204. if handleXMPPStreamTag && skey == "stream:stream" {
  205. n[skey] = na
  206. return n, nil
  207. }
  208. for {
  209. t, err := p.RawToken()
  210. if err != nil {
  211. if err != io.EOF {
  212. return nil, errors.New("xml.Decoder.Token() - " + err.Error())
  213. }
  214. return nil, err
  215. }
  216. switch t.(type) {
  217. case xml.StartElement:
  218. tt := t.(xml.StartElement)
  219. // First call to xmlSeqToMapParser() doesn't pass xml.StartElement - the map key.
  220. // So when the loop is first entered, the first token is the root tag along
  221. // with any attributes, which we process here.
  222. //
  223. // Subsequent calls to xmlSeqToMapParser() will pass in tag+attributes for
  224. // processing before getting the next token which is the element value,
  225. // which is done above.
  226. if skey == "" {
  227. if len(tt.Name.Space) > 0 {
  228. return xmlSeqToMapParser(tt.Name.Space+`:`+tt.Name.Local, tt.Attr, p, r)
  229. } else {
  230. return xmlSeqToMapParser(tt.Name.Local, tt.Attr, p, r)
  231. }
  232. }
  233. // If not initializing the map, parse the element.
  234. // len(nn) == 1, necessarily - it is just an 'n'.
  235. var nn map[string]interface{}
  236. if len(tt.Name.Space) > 0 {
  237. nn, err = xmlSeqToMapParser(tt.Name.Space+`:`+tt.Name.Local, tt.Attr, p, r)
  238. } else {
  239. nn, err = xmlSeqToMapParser(tt.Name.Local, tt.Attr, p, r)
  240. }
  241. if err != nil {
  242. return nil, err
  243. }
  244. // The nn map[string]interface{} value is a na[nn_key] value.
  245. // We need to see if nn_key already exists - means we're parsing a list.
  246. // This may require converting na[nn_key] value into []interface{} type.
  247. // First, extract the key:val for the map - it's a singleton.
  248. var key string
  249. var val interface{}
  250. for key, val = range nn {
  251. break
  252. }
  253. // add "#seq" k:v pair -
  254. // Sequence number included even in list elements - this should allow us
  255. // to properly resequence even something goofy like:
  256. // <list>item 1</list>
  257. // <subelement>item 2</subelement>
  258. // <list>item 3</list>
  259. // where all the "list" subelements are decoded into an array.
  260. switch val.(type) {
  261. case map[string]interface{}:
  262. val.(map[string]interface{})["#seq"] = seq
  263. seq++
  264. case interface{}: // a non-nil simple element: string, float64, bool
  265. v := map[string]interface{}{"#text": val, "#seq": seq}
  266. seq++
  267. val = v
  268. }
  269. // 'na' holding sub-elements of n.
  270. // See if 'key' already exists.
  271. // If 'key' exists, then this is a list, if not just add key:val to na.
  272. if v, ok := na[key]; ok {
  273. var a []interface{}
  274. switch v.(type) {
  275. case []interface{}:
  276. a = v.([]interface{})
  277. default: // anything else - note: v.(type) != nil
  278. a = []interface{}{v}
  279. }
  280. a = append(a, val)
  281. na[key] = a
  282. } else {
  283. na[key] = val // save it as a singleton
  284. }
  285. case xml.EndElement:
  286. if skey != "" {
  287. tt := t.(xml.EndElement)
  288. if snakeCaseKeys {
  289. tt.Name.Local = strings.Replace(tt.Name.Local, "-", "_", -1)
  290. }
  291. var name string
  292. if len(tt.Name.Space) > 0 {
  293. name = tt.Name.Space + `:` + tt.Name.Local
  294. } else {
  295. name = tt.Name.Local
  296. }
  297. if skey != name {
  298. return nil, fmt.Errorf("element %s not properly terminated, got %s at #%d",
  299. skey, name, p.InputOffset())
  300. }
  301. }
  302. // len(n) > 0 if this is a simple element w/o xml.Attrs - see xml.CharData case.
  303. if len(n) == 0 {
  304. // If len(na)==0 we have an empty element == "";
  305. // it has no xml.Attr nor xml.CharData.
  306. // Empty element content will be map["etag"]map["#text"]""
  307. // after #seq injection - map["etag"]map["#seq"]seq - after return.
  308. if len(na) > 0 {
  309. n[skey] = na
  310. } else {
  311. n[skey] = "" // empty element
  312. }
  313. }
  314. return n, nil
  315. case xml.CharData:
  316. // clean up possible noise
  317. tt := strings.Trim(string(t.(xml.CharData)), trimRunes)
  318. if xmlEscapeCharsDecoder { // issue#84
  319. tt = escapeChars(tt)
  320. }
  321. if skey == "" {
  322. // per Adrian (http://www.adrianlungu.com/) catch stray text
  323. // in decoder stream -
  324. // https://github.com/clbanning/mxj/pull/14#issuecomment-182816374
  325. // NOTE: CharSetReader must be set to non-UTF-8 CharSet or you'll get
  326. // a p.Token() decoding error when the BOM is UTF-16 or UTF-32.
  327. continue
  328. }
  329. if len(tt) > 0 {
  330. // every simple element is a #text and has #seq associated with it
  331. na["#text"] = cast(tt, r, "")
  332. na["#seq"] = seq
  333. seq++
  334. }
  335. case xml.Comment:
  336. if n == nil { // no root 'key'
  337. n = map[string]interface{}{"#comment": string(t.(xml.Comment))}
  338. return n, NoRoot
  339. }
  340. cm := make(map[string]interface{}, 2)
  341. cm["#text"] = string(t.(xml.Comment))
  342. cm["#seq"] = seq
  343. seq++
  344. na["#comment"] = cm
  345. case xml.Directive:
  346. if n == nil { // no root 'key'
  347. n = map[string]interface{}{"#directive": string(t.(xml.Directive))}
  348. return n, NoRoot
  349. }
  350. dm := make(map[string]interface{}, 2)
  351. dm["#text"] = string(t.(xml.Directive))
  352. dm["#seq"] = seq
  353. seq++
  354. na["#directive"] = dm
  355. case xml.ProcInst:
  356. if n == nil {
  357. na = map[string]interface{}{"#target": t.(xml.ProcInst).Target, "#inst": string(t.(xml.ProcInst).Inst)}
  358. n = map[string]interface{}{"#procinst": na}
  359. return n, NoRoot
  360. }
  361. pm := make(map[string]interface{}, 3)
  362. pm["#target"] = t.(xml.ProcInst).Target
  363. pm["#inst"] = string(t.(xml.ProcInst).Inst)
  364. pm["#seq"] = seq
  365. seq++
  366. na["#procinst"] = pm
  367. default:
  368. // noop - shouldn't ever get here, now, since we handle all token types
  369. }
  370. }
  371. }
  372. // ------------------ END: NewMapXml & NewMapXmlReader -------------------------
  373. // --------------------- mv.XmlSeq & mv.XmlSeqWriter -------------------------
  374. // Xml encodes a MapSeq as XML with elements sorted on #seq. The companion of NewMapXmlSeq().
  375. // The following rules apply.
  376. // - The "#seq" key value is used to seqence the subelements or attributes only.
  377. // - The "#attr" map key identifies the map of attribute map[string]interface{} values with "#text" key.
  378. // - The "#comment" map key identifies a comment in the value "#text" map entry - <!--comment-->.
  379. // - The "#directive" map key identifies a directive in the value "#text" map entry - <!directive>.
  380. // - The "#procinst" map key identifies a process instruction in the value "#target" and "#inst"
  381. // map entries - <?target inst?>.
  382. // - Value type encoding:
  383. // > string, bool, float64, int, int32, int64, float32: per "%v" formating
  384. // > []bool, []uint8: by casting to string
  385. // > structures, etc.: handed to xml.Marshal() - if there is an error, the element
  386. // value is "UNKNOWN"
  387. // - Elements with only attribute values or are null are terminated using "/>" unless XmlGoEmptyElemSystax() called.
  388. // - If len(mv) == 1 and no rootTag is provided, then the map key is used as the root tag, possible.
  389. // Thus, `{ "key":"value" }` encodes as "<key>value</key>".
  390. func (mv MapSeq) Xml(rootTag ...string) ([]byte, error) {
  391. m := map[string]interface{}(mv)
  392. var err error
  393. s := new(string)
  394. p := new(pretty) // just a stub
  395. if len(m) == 1 && len(rootTag) == 0 {
  396. for key, value := range m {
  397. // if it's an array, see if all values are map[string]interface{}
  398. // we force a new root tag if we'll end up with no key:value in the list
  399. // so: key:[string_val, bool:true] --> <doc><key>string_val</key><bool>true</bool></doc>
  400. switch value.(type) {
  401. case []interface{}:
  402. for _, v := range value.([]interface{}) {
  403. switch v.(type) {
  404. case map[string]interface{}: // noop
  405. default: // anything else
  406. err = mapToXmlSeqIndent(false, s, DefaultRootTag, m, p)
  407. goto done
  408. }
  409. }
  410. }
  411. err = mapToXmlSeqIndent(false, s, key, value, p)
  412. }
  413. } else if len(rootTag) == 1 {
  414. err = mapToXmlSeqIndent(false, s, rootTag[0], m, p)
  415. } else {
  416. err = mapToXmlSeqIndent(false, s, DefaultRootTag, m, p)
  417. }
  418. done:
  419. if xmlCheckIsValid {
  420. d := xml.NewDecoder(bytes.NewReader([]byte(*s)))
  421. for {
  422. _, err = d.Token()
  423. if err == io.EOF {
  424. err = nil
  425. break
  426. } else if err != nil {
  427. return nil, err
  428. }
  429. }
  430. }
  431. return []byte(*s), err
  432. }
  433. // The following implementation is provided only for symmetry with NewMapXmlReader[Raw]
  434. // The names will also provide a key for the number of return arguments.
  435. // XmlWriter Writes the MapSeq value as XML on the Writer.
  436. // See MapSeq.Xml() for encoding rules.
  437. func (mv MapSeq) XmlWriter(xmlWriter io.Writer, rootTag ...string) error {
  438. x, err := mv.Xml(rootTag...)
  439. if err != nil {
  440. return err
  441. }
  442. _, err = xmlWriter.Write(x)
  443. return err
  444. }
  445. // XmlWriteRaw writes the MapSeq value as XML on the Writer. []byte is the raw XML that was written.
  446. // See Map.XmlSeq() for encoding rules.
  447. /*
  448. func (mv MapSeq) XmlWriterRaw(xmlWriter io.Writer, rootTag ...string) ([]byte, error) {
  449. x, err := mv.Xml(rootTag...)
  450. if err != nil {
  451. return x, err
  452. }
  453. _, err = xmlWriter.Write(x)
  454. return x, err
  455. }
  456. */
  457. // XmlIndentWriter writes the MapSeq value as pretty XML on the Writer.
  458. // See MapSeq.Xml() for encoding rules.
  459. func (mv MapSeq) XmlIndentWriter(xmlWriter io.Writer, prefix, indent string, rootTag ...string) error {
  460. x, err := mv.XmlIndent(prefix, indent, rootTag...)
  461. if err != nil {
  462. return err
  463. }
  464. _, err = xmlWriter.Write(x)
  465. return err
  466. }
  467. // XmlIndentWriterRaw writes the Map as pretty XML on the Writer. []byte is the raw XML that was written.
  468. // See Map.XmlSeq() for encoding rules.
  469. /*
  470. func (mv MapSeq) XmlIndentWriterRaw(xmlWriter io.Writer, prefix, indent string, rootTag ...string) ([]byte, error) {
  471. x, err := mv.XmlSeqIndent(prefix, indent, rootTag...)
  472. if err != nil {
  473. return x, err
  474. }
  475. _, err = xmlWriter.Write(x)
  476. return x, err
  477. }
  478. */
  479. // -------------------- END: mv.Xml & mv.XmlWriter -------------------------------
  480. // ---------------------- XmlSeqIndent ----------------------------
  481. // XmlIndent encodes a map[string]interface{} as a pretty XML string.
  482. // See MapSeq.XmlSeq() for encoding rules.
  483. func (mv MapSeq) XmlIndent(prefix, indent string, rootTag ...string) ([]byte, error) {
  484. m := map[string]interface{}(mv)
  485. var err error
  486. s := new(string)
  487. p := new(pretty)
  488. p.indent = indent
  489. p.padding = prefix
  490. if len(m) == 1 && len(rootTag) == 0 {
  491. // this can extract the key for the single map element
  492. // use it if it isn't a key for a list
  493. for key, value := range m {
  494. if _, ok := value.([]interface{}); ok {
  495. err = mapToXmlSeqIndent(true, s, DefaultRootTag, m, p)
  496. } else {
  497. err = mapToXmlSeqIndent(true, s, key, value, p)
  498. }
  499. }
  500. } else if len(rootTag) == 1 {
  501. err = mapToXmlSeqIndent(true, s, rootTag[0], m, p)
  502. } else {
  503. err = mapToXmlSeqIndent(true, s, DefaultRootTag, m, p)
  504. }
  505. if xmlCheckIsValid {
  506. if _, err = NewMapXml([]byte(*s)); err != nil {
  507. return nil, err
  508. }
  509. d := xml.NewDecoder(bytes.NewReader([]byte(*s)))
  510. for {
  511. _, err = d.Token()
  512. if err == io.EOF {
  513. err = nil
  514. break
  515. } else if err != nil {
  516. return nil, err
  517. }
  518. }
  519. }
  520. return []byte(*s), err
  521. }
  522. // where the work actually happens
  523. // returns an error if an attribute is not atomic
  524. func mapToXmlSeqIndent(doIndent bool, s *string, key string, value interface{}, pp *pretty) error {
  525. var endTag bool
  526. var isSimple bool
  527. var noEndTag bool
  528. var elen int
  529. var ss string
  530. p := &pretty{pp.indent, pp.cnt, pp.padding, pp.mapDepth, pp.start}
  531. switch value.(type) {
  532. case map[string]interface{}, []byte, string, float64, bool, int, int32, int64, float32:
  533. if doIndent {
  534. *s += p.padding
  535. }
  536. if key != "#comment" && key != "#directive" && key != "#procinst" {
  537. *s += `<` + key
  538. }
  539. }
  540. switch value.(type) {
  541. case map[string]interface{}:
  542. val := value.(map[string]interface{})
  543. if key == "#comment" {
  544. *s += `<!--` + val["#text"].(string) + `-->`
  545. noEndTag = true
  546. break
  547. }
  548. if key == "#directive" {
  549. *s += `<!` + val["#text"].(string) + `>`
  550. noEndTag = true
  551. break
  552. }
  553. if key == "#procinst" {
  554. *s += `<?` + val["#target"].(string) + ` ` + val["#inst"].(string) + `?>`
  555. noEndTag = true
  556. break
  557. }
  558. haveAttrs := false
  559. // process attributes first
  560. if v, ok := val["#attr"].(map[string]interface{}); ok {
  561. // First, unroll the map[string]interface{} into a []keyval array.
  562. // Then sequence it.
  563. kv := make([]keyval, len(v))
  564. n := 0
  565. for ak, av := range v {
  566. kv[n] = keyval{ak, av}
  567. n++
  568. }
  569. sort.Sort(elemListSeq(kv))
  570. // Now encode the attributes in original decoding sequence, using keyval array.
  571. for _, a := range kv {
  572. vv := a.v.(map[string]interface{})
  573. switch vv["#text"].(type) {
  574. case string:
  575. if xmlEscapeChars {
  576. ss = escapeChars(vv["#text"].(string))
  577. } else {
  578. ss = vv["#text"].(string)
  579. }
  580. *s += ` ` + a.k + `="` + ss + `"`
  581. case float64, bool, int, int32, int64, float32:
  582. *s += ` ` + a.k + `="` + fmt.Sprintf("%v", vv["#text"]) + `"`
  583. case []byte:
  584. if xmlEscapeChars {
  585. ss = escapeChars(string(vv["#text"].([]byte)))
  586. } else {
  587. ss = string(vv["#text"].([]byte))
  588. }
  589. *s += ` ` + a.k + `="` + ss + `"`
  590. default:
  591. return fmt.Errorf("invalid attribute value for: %s", a.k)
  592. }
  593. }
  594. haveAttrs = true
  595. }
  596. // simple element?
  597. // every map value has, at least, "#seq" and, perhaps, "#text" and/or "#attr"
  598. _, seqOK := val["#seq"] // have key
  599. if v, ok := val["#text"]; ok && ((len(val) == 3 && haveAttrs) || (len(val) == 2 && !haveAttrs)) && seqOK {
  600. if stmp, ok := v.(string); ok && stmp != "" {
  601. if xmlEscapeChars {
  602. stmp = escapeChars(stmp)
  603. }
  604. *s += ">" + stmp
  605. endTag = true
  606. elen = 1
  607. }
  608. isSimple = true
  609. break
  610. } else if !ok && ((len(val) == 2 && haveAttrs) || (len(val) == 1 && !haveAttrs)) && seqOK {
  611. // here no #text but have #seq or #seq+#attr
  612. endTag = false
  613. break
  614. }
  615. // we now need to sequence everything except attributes
  616. // 'kv' will hold everything that needs to be written
  617. kv := make([]keyval, 0)
  618. for k, v := range val {
  619. if k == "#attr" { // already processed
  620. continue
  621. }
  622. if k == "#seq" { // ignore - just for sorting
  623. continue
  624. }
  625. switch v.(type) {
  626. case []interface{}:
  627. // unwind the array as separate entries
  628. for _, vv := range v.([]interface{}) {
  629. kv = append(kv, keyval{k, vv})
  630. }
  631. default:
  632. kv = append(kv, keyval{k, v})
  633. }
  634. }
  635. // close tag with possible attributes
  636. *s += ">"
  637. if doIndent {
  638. *s += "\n"
  639. }
  640. // something more complex
  641. p.mapDepth++
  642. sort.Sort(elemListSeq(kv))
  643. i := 0
  644. for _, v := range kv {
  645. switch v.v.(type) {
  646. case []interface{}:
  647. default:
  648. if i == 0 && doIndent {
  649. p.Indent()
  650. }
  651. }
  652. i++
  653. if err := mapToXmlSeqIndent(doIndent, s, v.k, v.v, p); err != nil {
  654. return err
  655. }
  656. switch v.v.(type) {
  657. case []interface{}: // handled in []interface{} case
  658. default:
  659. if doIndent {
  660. p.Outdent()
  661. }
  662. }
  663. i--
  664. }
  665. p.mapDepth--
  666. endTag = true
  667. elen = 1 // we do have some content other than attrs
  668. case []interface{}:
  669. for _, v := range value.([]interface{}) {
  670. if doIndent {
  671. p.Indent()
  672. }
  673. if err := mapToXmlSeqIndent(doIndent, s, key, v, p); err != nil {
  674. return err
  675. }
  676. if doIndent {
  677. p.Outdent()
  678. }
  679. }
  680. return nil
  681. case nil:
  682. // terminate the tag
  683. if doIndent {
  684. *s += p.padding
  685. }
  686. *s += "<" + key
  687. endTag, isSimple = true, true
  688. break
  689. default: // handle anything - even goofy stuff
  690. elen = 0
  691. switch value.(type) {
  692. case string:
  693. if xmlEscapeChars {
  694. ss = escapeChars(value.(string))
  695. } else {
  696. ss = value.(string)
  697. }
  698. elen = len(ss)
  699. if elen > 0 {
  700. *s += ">" + ss
  701. }
  702. case float64, bool, int, int32, int64, float32:
  703. v := fmt.Sprintf("%v", value)
  704. elen = len(v)
  705. if elen > 0 {
  706. *s += ">" + v
  707. }
  708. case []byte: // NOTE: byte is just an alias for uint8
  709. // similar to how xml.Marshal handles []byte structure members
  710. if xmlEscapeChars {
  711. ss = escapeChars(string(value.([]byte)))
  712. } else {
  713. ss = string(value.([]byte))
  714. }
  715. elen = len(ss)
  716. if elen > 0 {
  717. *s += ">" + ss
  718. }
  719. default:
  720. var v []byte
  721. var err error
  722. if doIndent {
  723. v, err = xml.MarshalIndent(value, p.padding, p.indent)
  724. } else {
  725. v, err = xml.Marshal(value)
  726. }
  727. if err != nil {
  728. *s += ">UNKNOWN"
  729. } else {
  730. elen = len(v)
  731. if elen > 0 {
  732. *s += string(v)
  733. }
  734. }
  735. }
  736. isSimple = true
  737. endTag = true
  738. }
  739. if endTag && !noEndTag {
  740. if doIndent {
  741. if !isSimple {
  742. *s += p.padding
  743. }
  744. }
  745. switch value.(type) {
  746. case map[string]interface{}, []byte, string, float64, bool, int, int32, int64, float32:
  747. if elen > 0 || useGoXmlEmptyElemSyntax {
  748. if elen == 0 {
  749. *s += ">"
  750. }
  751. *s += `</` + key + ">"
  752. } else {
  753. *s += `/>`
  754. }
  755. }
  756. } else if !noEndTag {
  757. if useGoXmlEmptyElemSyntax {
  758. *s += `</` + key + ">"
  759. // *s += "></" + key + ">"
  760. } else {
  761. *s += "/>"
  762. }
  763. }
  764. if doIndent {
  765. if p.cnt > p.start {
  766. *s += "\n"
  767. }
  768. p.Outdent()
  769. }
  770. return nil
  771. }
  772. // the element sort implementation
  773. type keyval struct {
  774. k string
  775. v interface{}
  776. }
  777. type elemListSeq []keyval
  778. func (e elemListSeq) Len() int {
  779. return len(e)
  780. }
  781. func (e elemListSeq) Swap(i, j int) {
  782. e[i], e[j] = e[j], e[i]
  783. }
  784. func (e elemListSeq) Less(i, j int) bool {
  785. var iseq, jseq int
  786. var fiseq, fjseq float64
  787. var ok bool
  788. if iseq, ok = e[i].v.(map[string]interface{})["#seq"].(int); !ok {
  789. if fiseq, ok = e[i].v.(map[string]interface{})["#seq"].(float64); ok {
  790. iseq = int(fiseq)
  791. } else {
  792. iseq = 9999999
  793. }
  794. }
  795. if jseq, ok = e[j].v.(map[string]interface{})["#seq"].(int); !ok {
  796. if fjseq, ok = e[j].v.(map[string]interface{})["#seq"].(float64); ok {
  797. jseq = int(fjseq)
  798. } else {
  799. jseq = 9999999
  800. }
  801. }
  802. return iseq <= jseq
  803. }
  804. // =============== https://groups.google.com/forum/#!topic/golang-nuts/lHPOHD-8qio
  805. // BeautifyXml (re)formats an XML doc similar to Map.XmlIndent().
  806. // It preserves comments, directives and process instructions,
  807. func BeautifyXml(b []byte, prefix, indent string) ([]byte, error) {
  808. x, err := NewMapXmlSeq(b)
  809. if err != nil {
  810. return nil, err
  811. }
  812. return x.XmlIndent(prefix, indent)
  813. }