module HTMLTextUtils # Strip all HTML tags from user input you don't want # and also auto-close tags that were accidentally (?) left open. def self.format_text(input) if (input==nil) return '' end output = "#{input.strip}" # do some formatting output.gsub!(/\r\n/, "\n") # remove CRLFs output.gsub!(/^$\s*/m, "\n") # remove blank lins output.gsub!(/\n{3,}/, "\n\n") # replace \n\n\n... with \n\n #output.gsub!(/\n\n/, '

') # embed stuff in paragraphs output.gsub!(/\n/, '
') # nl2br sanitize output end def self.sanitize( html, okTags='a href, b, br, i, em, strong' ) # no closing tag necessary for these soloTags = ["br","hr"] # Build hash of allowed tags with allowed attributes tags = okTags.downcase().split(',').collect!{ |s| s.split(' ') } allowed = Hash.new tags.each do |s| key = s.shift allowed[key] = s end # Analyze all <> elements stack = Array.new result = html.gsub( /(<.*?>)/m ) do | element | if element =~ /\A<\/(\w+)/ then # tag = $1.downcase if allowed.include?(tag) && stack.include?(tag) then # If allowed and on the stack # Then pop down the stack top = stack.pop out = "" until top == tag do top = stack.pop out << "" end out end elsif element =~ /\A<(\w+)\s*\/>/ # tag = $1.downcase if allowed.include?(tag) then "<#{tag} />" end elsif element =~ /\A<(\w+)/ then # tag = $1.downcase if allowed.include?(tag) then if ! soloTags.include?(tag) then stack.push(tag) end if allowed[tag].length == 0 then # no allowed attributes "<#{tag}>" else # allowed attributes? out = "<#{tag}" while ( $' =~ /(\w+)=("[^"]+")/ ) attr = $1.downcase valu = $2 if allowed[tag].include?(attr) then out << " #{attr}=#{valu}" end end out << ">" end end end end # eat up unmatched leading > while result.sub!(/\A([^<]*)>/m) { $1 } do end # eat up unmatched trailing < while result.sub!(/<([^>]*)\Z/m) { $1 } do end # clean up the stack if stack.length > 0 then result << "" end result end end