This is an XML parser written in PHP for usage in IDE tools. It is significantly slower than existing XML parsers, but the benifit is that you get an Abstract Syntax Tree as an output. There are also some small adjustments that allows for a slight error tolerance.
The parser implementation is heavily inspired from the Tolerant PHP Parser by Microsoft.
Via Composer
composer require raideer/xml-parser
<?php
// Require autoload.php generated by composer if haven't already
require __DIR__ . '/vendor/autoload.php';
use Raideer\XmlParser\Parser;
$parser = new Parser();
$document = $parser->parse('<xml>Hello world!</xml>');
$document->walkDescendantNodesAndTokens(function ($nodeOrToken) {
echo json_encode($nodeOrToken) . PHP_EOL;
});
// This will be the output of the first node
// {
// "type": "element",
// "children": {
// "3": {
// "type": "content",
// "children": [
// {
// "type": "charData",
// "children": [],
// "tokens": [
// {
// "kind": "TEXT",
// "value": "Hello world!",
// "offset": 5
// }
// ]
// }
// ],
// "tokens": []
// }
// },
// "tokens": {
// "0": {
// "kind": "OPEN",
// "value": "<",
// "offset": 0
// },
// "1": {
// "kind": "NAME",
// "value": "xml",
// "offset": 1
// },
// "2": {
// "kind": "CLOSE",
// "value": ">",
// "offset": 4
// },
// "4": {
// "kind": "OPEN",
// "value": "<",
// "offset": 17
// },
// "5": {
// "kind": "SLASH",
// "value": "/",
// "offset": 18
// },
// "6": {
// "kind": "NAME",
// "value": "xml",
// "offset": 19
// },
// "7": {
// "kind": "CLOSE",
// "value": ">",
// "offset": 22
// }
// }
// }