rt = rt.replace(u”ả”, “a”)
rt = rt.replace(u”ã”, “a”)
rt = rt.replace(u”ạ”, “a”)
rt = rt.replace(u”à”, “a”)
rt = rt.replace(u”á”, “a”)
rt = rt.replace(u”â”, “a”)
rt = rt.replace(u”ẩ”, “a”)
rt = rt.replace(u”ẫ”, “a”)
rt = rt.replace(u”ậ”, “a”)
rt = rt.replace(u”ầ”, “a”)
rt = rt.replace(u”ấ”, “a”)
rt = rt.replace(u”ẻ”, “e”)
rt = rt.replace(u”ẽ”, “e”)
rt = rt.replace(u”ẹ”, “e”)
rt = rt.replace(u”è”, “e”)
rt = rt.replace(u”é”, “e”)
rt = rt.replace(u”ê”, “e”)
rt = rt.replace(u”ể”, “e”)
rt = rt.replace(u”ễ”, “e”)
rt = rt.replace(u”ệ”, “e”)
rt = rt.replace(u”ề”, “e”)
rt = rt.replace(u”ế”, “e”)
rt = rt.replace(u”ỉ”, “i”)
rt = rt.replace(u”ĩ”, “i”)
rt = rt.replace(u”ị”, “i”)
rt = rt.replace(u”ì”, “i”)
rt = rt.replace(u”í”, “i”)
rt = rt.replace(u”ỏ”, “o”)
rt = rt.replace(u”õ”, “o”)
rt = rt.replace(u”ọ”, “o”)
rt = rt.replace(u”ò”, “o”)
rt = rt.replace(u”ó”, “o”)
rt = rt.replace(u”ô”, “o”)
rt = rt.replace(u”ổ”, “o”)
rt = rt.replace(u”ỗ”, “o”)
rt = rt.replace(u”ộ”, “o”)
rt = rt.replace(u”ồ”, “o”)
rt = rt.replace(u”ố”, “o”)
rt = rt.replace(u”ơ”, “o”)
rt = rt.replace(u”ở”, “o”)
rt = rt.replace(u”ỡ”, “o”)
rt = rt.replace(u”ợ”, “o”)
rt = rt.replace(u”ờ”, “o”)
rt = rt.replace(u”ớ”, “o”)
rt = rt.replace(u”ủ”, “u”)
rt = rt.replace(u”ũ”, “u”)
rt = rt.replace(u”ụ”, “u”)
rt = rt.replace(u”ù”, “u”)
rt = rt.replace(u”ú”, “u”)
rt = rt.replace(u”ư”, “u”)
rt = rt.replace(u”ử”, “u”)
rt = rt.replace(u”ữ”, “u”)
rt = rt.replace(u”ự”, “u”)
rt = rt.replace(u”ừ”, “u”)
rt = rt.replace(u”ứ”, “u”)
rt = rt.replace(u”ỷ”, “y”)
rt = rt.replace(u”ỹ”, “y”)
rt = rt.replace(u”ỵ”, “y”)
rt = rt.replace(u”ỳ”, “y”)
rt = rt.replace(u”ý”, “y”)
#writen by canhkaka
class myhtmllib:
def __init__(self):
passs
def HTMLreplace(self, rt):
#chu cai 1 dau
rt = rt.replace(“á”,”á”)
rt = rt.replace(“à”,”à”)
rt = rt.replace(“Á”,”Á”)
rt = rt.replace(“À”,”À”)
rt = rt.replace(“ã”,”ã”)
rt = rt.replace(“Ô,”Ô)
rt = rt.replace(“â”, “â”)
rt = rt.replace(“”, “”)
rt = rt.replace(“é”,”é”)
rt = rt.replace(“è”,”è”)
rt = rt.replace(“É”,”É”)
rt = rt.replace(“È”,”È”)
rt = rt.replace(“&Etilde;”,”Ẽ”)
rt = rt.replace(“ê”,”ê”)
rt = rt.replace(“Ê”,”Ê”)
rt = rt.replace(“í”,”í”)
rt = rt.replace(“ì”,”ì”)
rt = rt.replace(“Í”,”Í”)
rt = rt.replace(“Ì”,”Ì”)
rt = rt.replace(“ĩ”,”ĩ”)
rt = rt.replace(“Ĩ”,”Ĩ”)
rt = rt.replace(“ò”,”ò”)
rt = rt.replace(“Ò”,”Ò”)
rt = rt.replace(“ó”,”ó”)
rt = rt.replace(“Ó”,”Ó”)
rt = rt.replace(“õ”,”õ”)
rt = rt.replace(“Õ”,”Õ”)
rt = rt.replace(“ô”, “ô”)
rt = rt.replace(“Ô”, “Ô”)
rt = rt.replace(“ú”,”ú”)
rt = rt.replace(“ù”,”ù”)
rt = rt.replace(“Ú”,”Ú”)
rt = rt.replace(“Ù”,”Ù”)
rt = rt.replace(“ũ”,”ũ”)
rt = rt.replace(“Ũ”,”Ũ”)
rt = rt.replace(“ý”,”ý”)
rt = rt.replace(“&ygrave;”,”ỳ”)
rt = rt.replace(“Ý”,”Ý”)
rt = rt.replace(“&Ygrave;”,”Ỳ”)
rt = rt.replace(“&ytilde;”,”ỹ”)
rt = rt.replace(“&Ytilde;”,”Ỹ”)
# ky tu dac biet
rt = rt.replace(“"”,’”’)
rt = rt.replace(“‘”,”‘”)
rt = rt.replace(“’”,”’”)
rt = rt.replace(“…”,”…”)
rt = rt.replace(“ ”,”)
rt = rt.replace(“–”,’»’)
rt = rt.replace(“>”,’–’)
#ky tu so
rt = rt.replace(“ą”,”a”)
rt = rt.replace(“Ć”,”C”)
rt = rt.replace(“ć”,”c”)
rt = rt.replace(“Ę”,”E”)
rt = rt.replace(“ę”,”e”)
rt = rt.replace(“Ł”,”L”)
rt = rt.replace(“ł”,”l”)
rt = rt.replace(“Ń”,”N”)
rt = rt.replace(“ń”,”n”)
rt = rt.replace(“Ó”,”Ó”)
rt = rt.replace(“ó”,”ó”)
rt = rt.replace(“Ś”,”S”)
rt = rt.replace(“ś”,”s”)
rt = rt.replace(“Ż”,”Z”)
rt = rt.replace(“ż”,”z”)
rt = rt.replace(“Ż”,”Ź”)
rt = rt.replace(“ż”,”ź”)
return rt
#Writen by Canhkaka
# -*- coding: utf-8 -*-
def HTMLencode(x):
len1= len(x)
s= “”
for i in range(0,len1):
if ord(x[i]) > 255:
s = s + ‘&#’ + str(ord(x[i])) +’;’
else:
s = s+ x[i]
return s
def needDecode(x):
if ord(x)>=127:
return 1
if (x==”&”)|(x==”#”)|(x==”;”):
return 1
return 0
def URLencode(x):
x= HTMLencode(x)
x=x.replace(’ ‘, ‘+’)
len1= len(x)
s= “”
for i in range(0,len1):
if needDecode(x[i]):
iss = str(hex(ord(x[i]))[2:])
s = s+’%’+iss
else:
s = s+ x[i]
return s
import urllib
query=unicode(‘Cộng hòa xã hội chủ ngh ĩa đĩa thịt gà’, ‘utf-8’)
query= URLencode(query)
