Canhkaka's archive
convert có dấu thành không dấu trong python

rt = rt.replace(u”ả”, “a”)
rt = rt.replace(u”ã”, “a”)
rt = rt.replace(u”ạ”, “a”)
rt = rt.replace(u”à”, “a”)
rt = rt.replace(u”á”, “a”)

rt = rt.replace(u”â”, “a”)
rt = rt.replace(u”ẩ”, “a”)
rt = rt.replace(u”ẫ”, “a”)
rt = rt.replace(u”ậ”, “a”)
rt = rt.replace(u”ầ”, “a”)
rt = rt.replace(u”ấ”, “a”)

rt = rt.replace(u”ẻ”, “e”)
rt = rt.replace(u”ẽ”, “e”)
rt = rt.replace(u”ẹ”, “e”)
rt = rt.replace(u”è”, “e”)
rt = rt.replace(u”é”, “e”)

rt = rt.replace(u”ê”, “e”)
rt = rt.replace(u”ể”, “e”)
rt = rt.replace(u”ễ”, “e”)
rt = rt.replace(u”ệ”, “e”)
rt = rt.replace(u”ề”, “e”)
rt = rt.replace(u”ế”, “e”)

rt = rt.replace(u”ỉ”, “i”)
rt = rt.replace(u”ĩ”, “i”)
rt = rt.replace(u”ị”, “i”)
rt = rt.replace(u”ì”, “i”)
rt = rt.replace(u”í”, “i”)

rt = rt.replace(u”ỏ”, “o”)
rt = rt.replace(u”õ”, “o”)
rt = rt.replace(u”ọ”, “o”)
rt = rt.replace(u”ò”, “o”)
rt = rt.replace(u”ó”, “o”)

rt = rt.replace(u”ô”, “o”)
rt = rt.replace(u”ổ”, “o”)
rt = rt.replace(u”ỗ”, “o”)
rt = rt.replace(u”ộ”, “o”)
rt = rt.replace(u”ồ”, “o”)
rt = rt.replace(u”ố”, “o”)

rt = rt.replace(u”ơ”, “o”)
rt = rt.replace(u”ở”, “o”)
rt = rt.replace(u”ỡ”, “o”)
rt = rt.replace(u”ợ”, “o”)
rt = rt.replace(u”ờ”, “o”)
rt = rt.replace(u”ớ”, “o”)

rt = rt.replace(u”ủ”, “u”)
rt = rt.replace(u”ũ”, “u”)
rt = rt.replace(u”ụ”, “u”)
rt = rt.replace(u”ù”, “u”)
rt = rt.replace(u”ú”, “u”)

rt = rt.replace(u”ư”, “u”)
rt = rt.replace(u”ử”, “u”)
rt = rt.replace(u”ữ”, “u”)
rt = rt.replace(u”ự”, “u”)
rt = rt.replace(u”ừ”, “u”)
rt = rt.replace(u”ứ”, “u”)

rt = rt.replace(u”ỷ”, “y”)
rt = rt.replace(u”ỹ”, “y”)
rt = rt.replace(u”ỵ”, “y”)
rt = rt.replace(u”ỳ”, “y”)
rt = rt.replace(u”ý”, “y”)

Tiến thoái lưỡng nan, đi về lận đận…

Giving a second chance is okay. But giving third chances is stupid.
HTML encoding of foreign language characters

#writen by canhkaka

class myhtmllib:

    def __init__(self):

         passs

    def HTMLreplace(self, rt):

#chu cai 1 dau

        rt = rt.replace(“á”,”á”)

        rt = rt.replace(“à”,”à”)

        rt = rt.replace(“Á”,”Á”)

        rt = rt.replace(“À”,”À”)

        rt = rt.replace(“ã”,”ã”)

        rt = rt.replace(“Ô,”Ô)

        rt = rt.replace(“â”, “â”)

        rt = rt.replace(“”, “”)

        rt = rt.replace(“é”,”é”)

        rt = rt.replace(“è”,”è”)

        rt = rt.replace(“É”,”É”)

        rt = rt.replace(“È”,”È”)

        rt = rt.replace(“&Etilde;”,”Ẽ”)

        rt = rt.replace(“ê”,”ê”) 

        rt = rt.replace(“Ê”,”Ê”)

        rt = rt.replace(“í”,”í”)

        rt = rt.replace(“ì”,”ì”)

        rt = rt.replace(“Í”,”Í”)

        rt = rt.replace(“Ì”,”Ì”)

        rt = rt.replace(“ĩ”,”ĩ”)

        rt = rt.replace(“Ĩ”,”Ĩ”)

        rt = rt.replace(“ò”,”ò”)

        rt = rt.replace(“Ò”,”Ò”)

        rt = rt.replace(“ó”,”ó”)

        rt = rt.replace(“Ó”,”Ó”)

        rt = rt.replace(“õ”,”õ”)

        rt = rt.replace(“Õ”,”Õ”)

        rt = rt.replace(“ô”, “ô”)

        rt = rt.replace(“Ô”, “Ô”)

        rt = rt.replace(“ú”,”ú”)

        rt = rt.replace(“ù”,”ù”)

        rt = rt.replace(“Ú”,”Ú”)

        rt = rt.replace(“Ù”,”Ù”)

        rt = rt.replace(“ũ”,”ũ”)

        rt = rt.replace(“Ũ”,”Ũ”)

        rt = rt.replace(“ý”,”ý”)

        rt = rt.replace(“&ygrave;”,”ỳ”)

        rt = rt.replace(“Ý”,”Ý”)

        rt = rt.replace(“&Ygrave;”,”Ỳ”)

        rt = rt.replace(“&ytilde;”,”ỹ”)

        rt = rt.replace(“&Ytilde;”,”Ỹ”)

        # ky tu dac biet

        rt = rt.replace(“"”,’”’)

        rt = rt.replace(“‘”,”‘”)

        rt = rt.replace(“’”,”’”)

        rt = rt.replace(“…”,”…”)

        rt = rt.replace(“ ”,”)

        rt = rt.replace(“–”,’»’)

        rt = rt.replace(“>”,’–’)

#ky tu so

        rt = rt.replace(“&#261”,”a”) 

        rt = rt.replace(“&#262”,”C”) 

        rt = rt.replace(“&#263”,”c”) 

        rt = rt.replace(“&#280”,”E”) 

        rt = rt.replace(“&#281”,”e”) 

        rt = rt.replace(“&#321”,”L”) 

        rt = rt.replace(“&#322”,”l”) 

        rt = rt.replace(“&#323”,”N”) 

        rt = rt.replace(“&#324”,”n”) 

        rt = rt.replace(“&#211”,”Ó”) 

        rt = rt.replace(“&#243”,”ó”) 

        rt = rt.replace(“&#346”,”S”) 

        rt = rt.replace(“&#347”,”s”) 

        rt = rt.replace(“&#379”,”Z”) 

        rt = rt.replace(“&#380”,”z”) 

        rt = rt.replace(“&#379”,”&#377”) 

        rt = rt.replace(“&#380”,”&#378”)

        return rt

Encoding an un-ascii character using URLencode

#Writen by Canhkaka
# -*- coding: utf-8 -*-
def HTMLencode(x):
        len1= len(x)
        s= “”
        for i in range(0,len1):
            if ord(x[i]) > 255:
                s = s + ‘&#’ + str(ord(x[i])) +’;’
            else:
                s = s+ x[i]
        return s
def needDecode(x):
        if ord(x)>=127:
                return 1
        if (x==”&”)|(x==”#”)|(x==”;”):
                return 1
        return 0
                
                               
def URLencode(x):
        x= HTMLencode(x)
        x=x.replace(’ ‘, ‘+’)
        len1= len(x)
        s= “”
        for i in range(0,len1):
                if needDecode(x[i]):
                        iss = str(hex(ord(x[i]))[2:])
                        s = s+’%’+iss
                else:
                        s = s+ x[i]
        return s
import urllib

query=unicode(‘Cộng hòa xã hội chủ ngh ĩa đĩa thịt gà’, ‘utf-8’)
query= URLencode(query)