ISPConfig module for simplify the creation of websites and DNS zones in a only step
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

365 line
11 KiB

  1. <?php
  2. /*
  3. * Copyright (c) 2003 Jose Solorzano. All rights reserved.
  4. * Redistribution of source must retain this copyright notice.
  5. *
  6. * Jose Solorzano (http://jexpert.us) is a software consultant.
  7. *
  8. * Contributions by:
  9. * - Leo West (performance improvements)
  10. */
  11. define ("NODE_TYPE_START",0);
  12. define ("NODE_TYPE_ELEMENT",1);
  13. define ("NODE_TYPE_ENDELEMENT",2);
  14. define ("NODE_TYPE_TEXT",3);
  15. define ("NODE_TYPE_COMMENT",4);
  16. define ("NODE_TYPE_DONE",5);
  17. /**
  18. * Class HtmlParser.
  19. * To use, create an instance of the class passing
  20. * HTML text. Then invoke parse() until it's false.
  21. * When parse() returns true, $iNodeType, $iNodeName
  22. * $iNodeValue and $iNodeAttributes are updated.
  23. *
  24. * To create an HtmlParser instance you may also
  25. * use convenience functions HtmlParser_ForFile
  26. * and HtmlParser_ForURL.
  27. */
  28. class HtmlParser {
  29. /**
  30. * Field iNodeType.
  31. * May be one of the NODE_TYPE_* constants above.
  32. */
  33. public $iNodeType;
  34. /**
  35. * Field iNodeName.
  36. * For elements, it's the name of the element.
  37. */
  38. public $iNodeName = "";
  39. /**
  40. * Field iNodeValue.
  41. * For text nodes, it's the text.
  42. */
  43. public $iNodeValue = "";
  44. /**
  45. * Field iNodeAttributes.
  46. * A string-indexed array containing attribute values
  47. * of the current node. Indexes are always lowercase.
  48. */
  49. public $iNodeAttributes;
  50. // The following fields are private:
  51. private $iHtmlText;
  52. private $iHtmlTextLength;
  53. private $iHtmlTextIndex = 0;
  54. private $iHtmlCurrentChar;
  55. private $BOE_ARRAY;
  56. private $B_ARRAY;
  57. private $BOS_ARRAY;
  58. /**
  59. * Constructor.
  60. * Constructs an HtmlParser instance with
  61. * the HTML text given.
  62. */
  63. function __construct ($aHtmlText) {
  64. $this->iHtmlText = $aHtmlText;
  65. $this->iHtmlTextLength = strlen($aHtmlText);
  66. $this->iNodeAttributes = array();
  67. $this->setTextIndex (0);
  68. $this->BOE_ARRAY = array (" ", "\t", "\r", "\n", "=" );
  69. $this->B_ARRAY = array (" ", "\t", "\r", "\n" );
  70. $this->BOS_ARRAY = array (" ", "\t", "\r", "\n", "/" );
  71. }
  72. /**
  73. * Method parse.
  74. * Parses the next node. Returns false only if
  75. * the end of the HTML text has been reached.
  76. * Updates values of iNode* fields.
  77. */
  78. function parse() {
  79. $text = $this->skipToElement();
  80. if ($text != "") {
  81. $this->iNodeType = NODE_TYPE_TEXT;
  82. $this->iNodeName = "Text";
  83. $this->iNodeValue = $text;
  84. return true;
  85. }
  86. return $this->readTag();
  87. }
  88. function clearAttributes() {
  89. $this->iNodeAttributes = array();
  90. }
  91. function readTag() {
  92. if ($this->iCurrentChar != "<") {
  93. $this->iNodeType = NODE_TYPE_DONE;
  94. return false;
  95. }
  96. $this->clearAttributes();
  97. $this->skipMaxInTag ("<", 1);
  98. if ($this->iCurrentChar == '/') {
  99. $this->moveNext();
  100. $name = $this->skipToBlanksInTag();
  101. $this->iNodeType = NODE_TYPE_ENDELEMENT;
  102. $this->iNodeName = $name;
  103. $this->iNodeValue = "";
  104. $this->skipEndOfTag();
  105. return true;
  106. }
  107. $name = $this->skipToBlanksOrSlashInTag();
  108. if (!$this->isValidTagIdentifier ($name)) {
  109. $comment = false;
  110. if (strpos($name, "!--") === 0) {
  111. $ppos = strpos($name, "--", 3);
  112. if (strpos($name, "--", 3) === (strlen($name) - 2)) {
  113. $this->iNodeType = NODE_TYPE_COMMENT;
  114. $this->iNodeName = "Comment";
  115. $this->iNodeValue = "<" . $name . ">";
  116. $comment = true;
  117. }
  118. else {
  119. $rest = $this->skipToStringInTag ("-->");
  120. if ($rest != "") {
  121. $this->iNodeType = NODE_TYPE_COMMENT;
  122. $this->iNodeName = "Comment";
  123. $this->iNodeValue = "<" . $name . $rest;
  124. $comment = true;
  125. // Already skipped end of tag
  126. return true;
  127. }
  128. }
  129. }
  130. if (!$comment) {
  131. $this->iNodeType = NODE_TYPE_TEXT;
  132. $this->iNodeName = "Text";
  133. $this->iNodeValue = "<" . $name;
  134. return true;
  135. }
  136. }
  137. else {
  138. $this->iNodeType = NODE_TYPE_ELEMENT;
  139. $this->iNodeValue = "";
  140. $this->iNodeName = $name;
  141. while ($this->skipBlanksInTag()) {
  142. $attrName = $this->skipToBlanksOrEqualsInTag();
  143. if ($attrName != "" && $attrName != "/") {
  144. $this->skipBlanksInTag();
  145. if ($this->iCurrentChar == "=") {
  146. $this->skipEqualsInTag();
  147. $this->skipBlanksInTag();
  148. $value = $this->readValueInTag();
  149. $this->iNodeAttributes[strtolower($attrName)] = $value;
  150. }
  151. else {
  152. $this->iNodeAttributes[strtolower($attrName)] = "";
  153. }
  154. }
  155. }
  156. }
  157. $this->skipEndOfTag();
  158. return true;
  159. }
  160. function isValidTagIdentifier ($name) {
  161. return preg_match ("/^[A-Za-z0-9_\\-]+$/", $name);
  162. }
  163. function skipBlanksInTag() {
  164. return "" != ($this->skipInTag ($this->B_ARRAY));
  165. }
  166. function skipToBlanksOrEqualsInTag() {
  167. return $this->skipToInTag ($this->BOE_ARRAY);
  168. }
  169. function skipToBlanksInTag() {
  170. return $this->skipToInTag ($this->B_ARRAY);
  171. }
  172. function skipToBlanksOrSlashInTag() {
  173. return $this->skipToInTag ($this->BOS_ARRAY);
  174. }
  175. function skipEqualsInTag() {
  176. return $this->skipMaxInTag ("=", 1);
  177. }
  178. function readValueInTag() {
  179. $ch = $this->iCurrentChar;
  180. $value = "";
  181. if ($ch == "\"") {
  182. $this->skipMaxInTag ("\"", 1);
  183. $value = $this->skipToInTag ("\"");
  184. $this->skipMaxInTag ("\"", 1);
  185. }
  186. else if ($ch == "'") {
  187. $this->skipMaxInTag ("'", 1);
  188. $value = $this->skipToInTag ("'");
  189. $this->skipMaxInTag ("'", 1);
  190. }
  191. else {
  192. $value = $this->skipToBlanksInTag();
  193. }
  194. return $value;
  195. }
  196. function setTextIndex ($index) {
  197. $this->iHtmlTextIndex = $index;
  198. if ($index >= $this->iHtmlTextLength) {
  199. $this->iCurrentChar = -1;
  200. }
  201. else {
  202. $this->iCurrentChar = $this->iHtmlText{$index};
  203. }
  204. }
  205. function moveNext() {
  206. if ($this->iHtmlTextIndex < $this->iHtmlTextLength) {
  207. $this->setTextIndex ($this->iHtmlTextIndex + 1);
  208. return true;
  209. }
  210. else {
  211. return false;
  212. }
  213. }
  214. function skipEndOfTag() {
  215. while (($ch = $this->iCurrentChar) !== -1) {
  216. if ($ch == ">") {
  217. $this->moveNext();
  218. return;
  219. }
  220. $this->moveNext();
  221. }
  222. }
  223. function skipInTag ($chars) {
  224. $sb = "";
  225. while (($ch = $this->iCurrentChar) !== -1) {
  226. if ($ch == ">") {
  227. return $sb;
  228. } else {
  229. $match = false;
  230. for ($idx = 0; $idx < count($chars); $idx++) {
  231. if ($ch == $chars[$idx]) {
  232. $match = true;
  233. break;
  234. }
  235. }
  236. if (!$match) {
  237. return $sb;
  238. }
  239. $sb .= $ch;
  240. $this->moveNext();
  241. }
  242. }
  243. return $sb;
  244. }
  245. function skipMaxInTag ($chars, $maxChars) {
  246. $sb = "";
  247. $count = 0;
  248. while (($ch = $this->iCurrentChar) !== -1 && $count++ < $maxChars) {
  249. if ($ch == ">") {
  250. return $sb;
  251. } else {
  252. $match = false;
  253. for ($idx = 0; $idx < count($chars); $idx++) {
  254. if ($ch == $chars[$idx]) {
  255. $match = true;
  256. break;
  257. }
  258. }
  259. if (!$match) {
  260. return $sb;
  261. }
  262. $sb .= $ch;
  263. $this->moveNext();
  264. }
  265. }
  266. return $sb;
  267. }
  268. function skipToInTag ($chars) {
  269. $sb = "";
  270. while (($ch = $this->iCurrentChar) !== -1) {
  271. $match = $ch == ">";
  272. if (!$match) {
  273. for ($idx = 0; $idx < count($chars); $idx++) {
  274. if ($ch == $chars[$idx]) {
  275. $match = true;
  276. break;
  277. }
  278. }
  279. }
  280. if ($match) {
  281. return $sb;
  282. }
  283. $sb .= $ch;
  284. $this->moveNext();
  285. }
  286. return $sb;
  287. }
  288. function skipToElement() {
  289. $sb = "";
  290. while (($ch = $this->iCurrentChar) !== -1) {
  291. if ($ch == "<") {
  292. return $sb;
  293. }
  294. $sb .= $ch;
  295. $this->moveNext();
  296. }
  297. return $sb;
  298. }
  299. /**
  300. * Returns text between current position and $needle,
  301. * inclusive, or "" if not found. The current index is moved to a point
  302. * after the location of $needle, or not moved at all
  303. * if nothing is found.
  304. */
  305. function skipToStringInTag ($needle) {
  306. $pos = strpos ($this->iHtmlText, $needle, $this->iHtmlTextIndex);
  307. if ($pos === false) {
  308. return "";
  309. }
  310. $top = $pos + strlen($needle);
  311. $retvalue = substr ($this->iHtmlText, $this->iHtmlTextIndex, $top - $this->iHtmlTextIndex);
  312. $this->setTextIndex ($top);
  313. return $retvalue;
  314. }
  315. }
  316. function HtmlParser_ForFile ($fileName) {
  317. return HtmlParser_ForURL($fileName);
  318. }
  319. function HtmlParser_ForURL ($url) {
  320. $fp = fopen ($url, "r");
  321. $content = "";
  322. while (true) {
  323. $data = fread ($fp, 8192);
  324. if (strlen($data) == 0) {
  325. break;
  326. }
  327. $content .= $data;
  328. }
  329. fclose ($fp);
  330. return new HtmlParser ($content);
  331. }
  332. ?>