|  | @@ -1,5 +1,5 @@
 | 
	
		
			
			| 1 | 1 |  from HTMLParser import HTMLParser
 | 
	
		
			
			| 2 |  | -import htmlentitydefs
 | 
	
		
			
			|  | 2 | +#import htmlentitydefs
 | 
	
		
			
			| 3 | 3 |  import csv
 | 
	
		
			
			| 4 | 4 |  import codecs
 | 
	
		
			
			| 5 | 5 |  import cStringIO
 | 
	
	
		
			
			|  | @@ -17,8 +17,9 @@ class HTMLTextExtractor(HTMLParser):
 | 
	
		
			
			| 17 | 17 |          self.result.append(unichr(codepoint))
 | 
	
		
			
			| 18 | 18 |  
 | 
	
		
			
			| 19 | 19 |      def handle_entityref(self, name):
 | 
	
		
			
			| 20 |  | -        codepoint = htmlentitydefs.name2codepoint[name]
 | 
	
		
			
			| 21 |  | -        self.result.append(unichr(codepoint))
 | 
	
		
			
			|  | 20 | +        #codepoint = htmlentitydefs.name2codepoint[name]
 | 
	
		
			
			|  | 21 | +        #self.result.append(unichr(codepoint))
 | 
	
		
			
			|  | 22 | +        self.result.append(name)
 | 
	
		
			
			| 22 | 23 |  
 | 
	
		
			
			| 23 | 24 |      def get_text(self):
 | 
	
		
			
			| 24 | 25 |          return u''.join(self.result)
 |