<?php
 
 
/**
 
 * Transform the awful HTML of Flash into standardized HTML.
 
 */
 
class Flash2HTML {
 
 
    private $parser;
 
    private $nodes = array();
 
    private $entities = array();
 
    private $content = NULL;
 
    private $dataHandler = NULL;
 
    public $plainText = FALSE;
 
    public $protectEmail = FALSE;
 
 
    public function Flash2HTML() {
 
        $this->setTagTransformation('B', 'strong');
 
        $this->setTagTransformation('I', 'em');
 
        $this->setTagTransformation('LI', 'li');
 
 
        $this->setStartTagHandler('A', array($this, "a_startTagHandler"));
 
        $this->setStartTagHandler('FONT', array($this, "font_startTagHandler"));
 
        $this->setStartTagHandler('IMG', array($this, "img_startTagHandler"));
 
        $this->setStartTagHandler("P", array($this, "p_startTagHandler"));
 
        $this->setEndTagHandler("P", array($this, "p_endTagHandler"));
 
        $this->setStartTagHandler('TEXTFORMAT', array($this, "textformat_startTagHandler"));
 
        $this->setStartTagHandler('U', array($this, "u_startTagHandler"));
 
    }
 
 
    /**
 
     * 
 
     * @param String $data
 
     * @param Array $properties
 
     * @return String
 
     */
 
    public function __invoke($data, $properties = array()) {
 
        return $this->html($data, $properties);
 
    }
 
 
    /**
 
     * 
 
     * @param String $tag
 
     */
 
    public function ignoreTag($tag) {
 
        $offset = array_search($tag, array_keys($this->entities));
 
        if ($offset !== FALSE) {
 
            array_splice($this->entities, $offset, 1);
 
        }
 
    }
 
 
    /**
 
     * 
 
     * @param String $from_tag
 
     * @param String $to_tag
 
     */
 
    public function setTagTransformation($from_tag, $to_tag) {
 
        if (preg_match("/^\w+$/", $to_tag, $matches)) {
 
            $this->entities[$from_tag]["tag"] = $matches[0];
 
            $this->entities[$from_tag]["attributes"] = NULL;
 
            $this->entities[$from_tag]["closed"] = FALSE;
 
            $this->entities[$from_tag]["start_tag_handler"] = NULL;
 
            $this->entities[$from_tag]["data_tag_handler"] = NULL;
 
            $this->entities[$from_tag]["end_tag_handler"] = NULL;
 
        } else
 
        if (preg_match("/^<(\w+)\s+(.*)(\/?)>$/U", $to_tag, $matches)) {
 
            $this->entities[$from_tag]["tag"] = $matches[1];
 
            $this->entities[$from_tag]["attributes"] = $matches[2];
 
            $this->entities[$from_tag]["closed"] = $matches[3] == "/";
 
            $this->entities[$from_tag]["start_tag_handler"] = NULL;
 
            $this->entities[$from_tag]["data_tag_handler"] = NULL;
 
            $this->entities[$from_tag]["end_tag_handler"] = NULL;
 
        }
 
    }
 
 
    /**
 
     *
 
     * @param String $tag
 
     * @param Function $start_tag_handler
 
     */
 
    public function setStartTagHandler($tag, $start_tag_handler) {
 
        if (!array_key_exists($tag, $this->entities)) {
 
            $this->setTagTransformation($tag, strtolower($tag));
 
        }
 
        $this->entities[$tag]["start_tag_handler"] = $start_tag_handler;
 
    }
 
 
    /**
 
     *
 
     * @param String $tag
 
     * @param Function $data_handler
 
     */
 
    public function setDataHandler($data_handler) {
 
        $this->dataHandler = $data_handler;
 
    }
 
 
    /**
 
     *
 
     * @param String $tag
 
     * @param Function $end_tag_handler
 
     */
 
    public function setEndTagHandler($tag, $end_tag_handler) {
 
        if (!array_key_exists($tag, $this->entities)) {
 
            $this->setTagTransformation($tag, strtolower($tag));
 
        }
 
        $this->entities[$tag]["end_tag_handler"] = $end_tag_handler;
 
    }
 
 
    public function removeStartTagHandler($tag) {
 
        $this->entities[$tag]["start_tag_handler"] = NULL;
 
    }
 
 
    public function removeDataTagHandler($tag) {
 
        $this->entities[$tag]["data_tag_handler"] = NULL;
 
    }
 
 
    public function removeEndTagHandler($tag) {
 
        $this->entities[$tag]["end_tag_handler"] = NULL;
 
    }
 
 
    private function p_startTagHandler($parser, $tag, $attrs) {
 
        return NULL;
 
    }
 
 
    private function p_endTagHandler($parser, $tag) {
 
        return "<br />\n";
 
    }
 
 
    /**
 
     *
 
     * @param String $data
 
     * @param Array $properties
 
     * @return String
 
     */
 
    public function html($data, $properties = array()) {
 
        $uniqid = uniqid("unicode_");
 
        $data = json_encode($data);
 
        $data = preg_replace('/\\\u([0-9a-z]{4})/', "$uniqid\$1", $data);
 
        $data = json_decode($data);
 
 
        $original_properties = array();
 
        foreach ($properties as $key => $value) {
 
            $original_properties[$key] = $this->$key;
 
            $this->$key = $value;
 
        }
 
 
        $data = $this->prepare($data);
 
 
        $this->parser = xml_parser_create();
 
        xml_set_object($this->parser, $this);
 
        xml_parser_set_option($this->parser, XML_OPTION_CASE_FOLDING, TRUE);
 
        xml_set_element_handler($this->parser, "startTagHandler", "endTagHandler");
 
        xml_set_character_data_handler($this->parser, "dataHandler");
 
        xml_parse($this->parser, $data);
 
        xml_parser_free($this->parser);
 
        $ret = $this->content;
 
        $this->content = NULL;
 
 
        $ret = utf8_decode(str_replace(array('–', '’'), array('–', '’'), $ret));
 
        if ($this->protectEmail) {
 
            $ret = preg_replace_callback("/mailto:(.*)\"/U", array($this, "protectEmailCallback"), $ret);
 
        }
 
 
        // restore original properties
 
        foreach ($original_properties as $key => $value) {
 
            $this->$key = $value;
 
        }
 
 
        $ret = preg_replace("/$uniqid([0-9a-z]{4})/", '&#x$1;', $ret);
 
        $ret = preg_replace(array("/<br \/>\n$/", "/<br \/>\n<\/div>$/"), array(NULL, "</div>"), $ret);
 
        $ret = preg_replace("/<span[^>]*><\/span>/U", NULL, $ret);
 
        $ret = preg_replace("/<a([^>]*)><span style=\"text-decoration: underline; \">(.*)<\/span><\/a>/U", "<a\$1>\$2</a>", $ret);
 
        $ret = preg_replace("/<li>(.*)<\/li>/", "<ul><li>$1</li>\n</ul>\n", $ret);
 
        $ret = preg_replace("/<li>/", "\n\t<li>", $ret);
 
        $ret = preg_replace("/<br \/>\n<ul>/", "<ul>", $ret);
 
        $ret = preg_replace("/<\/ul>\n<br \/>/", "</ul>", $ret);
 
 
        return $ret;
 
    }
 
 
    private function prepare($data) {
 
        if (!$this->plainText) {
 
            $data = preg_replace_callback("/HREF=\"(.*)\"/U", array($this, "fixHREFCallback"), $data);
 
            $data = preg_replace("/<IMG(.*)>/U", "<IMG$1 />", $data);
 
        }
 
        return "<root>$data</root>";
 
    }
 
 
    private function startTagHandler($parser, $tag, $attrs) {
 
        if ($this->plainText) {
 
            return;
 
        }
 
 
        if (!key_exists($tag, $this->entities)) {
 
            return;
 
        }
 
 
        $entity = $this->entities[$tag];
 
        if ($entity['start_tag_handler'] != NULL) {
 
            $str = call_user_func($entity["start_tag_handler"], $parser, $tag, $attrs);
 
            if (preg_match("/^<(\w+)/", $str, $matches)) {
 
                $tag_name = $matches[1];
 
                $entity["tag"] = $tag_name;
 
                $entity["closed"] = preg_match("/.*\/>/", $str) === 1;
 
            }
 
            $this->content .= $str;
 
        } else {
 
            $this->content .= "<$entity[tag]";
 
            if (strlen($entity["attributes"]) > 0) {
 
                $this->content .= " $entity[attributes]";
 
            }
 
            if ($entity["closed"]) {
 
                $this->content .= " />";
 
            } else {
 
                $this->content .= ">";
 
            }
 
        }
 
 
        array_push($this->nodes, $entity);
 
    }
 
 
    private function dataHandler($parser, $cdata) {
 
        if ($this->plainText) {
 
            $this->content .= $cdata;
 
        } else {
 
            $str = NULL;
 
            if ($cdata == "&") {
 
                $str = "&";
 
            } else {
 
                $str = preg_replace_callback("/\s{2,}/", array($this, "replaceSpacesCallback"), $cdata);
 
            }
 
 
            if ($this->dataHandler != NULL) {
 
                $str = call_user_func($this->dataHandler, $parser, $cdata);
 
            }
 
 
            $this->content .= $str;
 
        }
 
    }
 
 
    private function endTagHandler($parser, $tag) {
 
        if (!key_exists($tag, $this->entities)) {
 
            return;
 
        }
 
 
        if ($this->plainText) {
 
            if ($tag == "P") {
 
                $this->content .= "\n";
 
            }
 
            return;
 
        }
 
 
        $entity = array_pop($this->nodes);
 
        if (!$entity["closed"]) {
 
            if ($entity['end_tag_handler'] != NULL) {
 
                $this->content .= call_user_func($entity["end_tag_handler"], $parser, $tag);
 
            } else {
 
                $this->content .= "</$entity[tag]>";
 
            }
 
        }
 
    }
 
 
    private function a_startTagHandler($parser, $tag, $attrs) {
 
        $ret = '<a href="' . htmlspecialchars($attrs["HREF"]) . '"';
 
        $ret .= array_key_exists("TARGET", $attrs) && (strlen($attrs["TARGET"]) > 0) ? ' target="' . $attrs["TARGET"] . '"' : NULL;
 
        $ret .= '>';
 
        return $ret;
 
    }
 
 
    private function font_startTagHandler($parser, $tag, $attrs) {
 
        $ret = '<span style="';
 
        $ret .= array_key_exists("FACE", $attrs) ? "font-family: '" . $attrs["FACE"] . "'; " : NULL;
 
        $ret .= array_key_exists("SIZE", $attrs) ? "font-size: " . $attrs["SIZE"] . "px; " : NULL;
 
        $ret .= array_key_exists("COLOR", $attrs) ? "color: " . $attrs["COLOR"] . "; " : NULL;
 
        $ret .= array_key_exists("LEADING", $attrs) ? "line-height: " . $attrs["LEADING"] . "px; " : NULL;
 
        $ret .= array_key_exists("LETTERSPACING", $attrs) ? "letter-spacing: " . $attrs["LETTERSPACING"] . "px; " : NULL;
 
        $ret .= '">';
 
        return $ret;
 
    }
 
 
    private function img_startTagHandler($parser, $tag, $attrs) {
 
        $style = $attrs["ALIGN"] == "right" ? "float: right; margin-left: 10px; " : "float: left; margin-right: 10px; ";
 
        $ret = "<img style=\"$style\" src=\"$attrs[SRC]\" alt=\"\" />";
 
        return $ret;
 
    }
 
 
    private function textformat_startTagHandler($parser, $tag, $attrs) {
 
        $ret = NULL;
 
        if (array_key_exists("BLOCKINDENT", $attrs) || array_key_exists("LEADING", $attrs)) {
 
            $ret = '<div style="';
 
            $ret .= array_key_exists("BLOCKINDENT", $attrs) ? "margin-left: " . $attrs["BLOCKINDENT"] . "px; " : NULL;
 
            $ret .= array_key_exists("LEADING", $attrs) ? "margin-bottom: " . $attrs["LEADING"] . "px; " : NULL;
 
            $ret .= '">';
 
        }
 
        return $ret;
 
    }
 
 
    private function u_startTagHandler($parser, $tag, $attrs) {
 
        $ret = '<span style="text-decoration: underline; ">';
 
        return $ret;
 
    }
 
 
    private function replaceSpacesCallback($matches) {
 
        $matches[0];
 
        return str_repeat(" ", strlen($matches[0]));
 
    }
 
 
    private function protectEmailCallback($matches) {
 
        $ret = NULL;
 
        $str = $matches[1];
 
        $len = strlen($str);
 
        for ($i = 0; $i < $len; $i++) {
 
            $ret .= "&#x" . strtoupper(dechex(ord($str[$i]))) . ";";
 
        }
 
        return "mailto:$ret\"";
 
    }
 
 
    private function fixHREFCallback($matches) {
 
        return 'HREF="' . htmlentities($matches[1]) . '"';
 
    }
 
 
}
 
 
?>
 
 |