Skip to content

Commit 7b3e322

Browse files
committed
2 parents c1f0010 + ea31c14 commit 7b3e322

File tree

3 files changed

+45
-5
lines changed

3 files changed

+45
-5
lines changed

README

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,11 @@ DESCRIPTION
3636
REQUIREMENTS
3737

3838
Nothing except Python itself.
39+
40+
You will need a Python build with "wide" Unicode characters in order
41+
for unidecode to work correctly with characters outside of Basic
42+
Multilingual Plane. Surrogate pair encoding of "narrow" builds is not
43+
supported.
3944

4045

4146
INSTALLATION
@@ -58,20 +63,20 @@ AVAILABILITY
5863
The latest version of Unidecode is available from the GIT
5964
repository at
6065

61-
http://code.zemanta.com/tsolc/git/wikiprep
66+
http://code.zemanta.com/tsolc/git/unidecode
6267

6368
You can get it by running:
6469

65-
git clone http://code.zemanta.com/tsolc/git/wikiprep
70+
git clone http://code.zemanta.com/tsolc/git/unidecode
6671

6772

6873
COPYRIGHT
6974

70-
Character transliteration tables:
75+
Original character transliteration tables:
7176

7277
Copyright 2001, Sean M. Burke <[email protected]>, all rights reserved.
7378

74-
Python code:
79+
Python code and later additions:
7580

7681
Copyright 2011, Tomaz Solc <[email protected]>
7782

tests/basic_2.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,21 @@
11
# -*- coding: utf-8 -*-
22
import unittest
3+
import sys
34
from unidecode import unidecode
45

6+
# workaround for Python < 2.7
7+
if not hasattr(unittest, 'skipIf'):
8+
def skipIf(condition, reason):
9+
def d(f):
10+
def df(*args):
11+
if condition:
12+
print "skipped %r" % (reason,)
13+
else:
14+
return f(*args)
15+
return df
16+
return d
17+
unittest.skipIf = skipIf
18+
519
class TestUnidecode(unittest.TestCase):
620
def test_ascii(self):
721
for n in xrange(0,128):
@@ -23,6 +37,7 @@ def test_circled_latin(self):
2337

2438
self.failUnlessEqual(b, a)
2539

40+
@unittest.skipIf(sys.maxunicode < 0x10000, "narrow build")
2641
def test_mathematical_latin(self):
2742
# 13 consecutive sequences of A-Z, a-z with some codepoints
2843
# undefined. We just count the undefined ones and don't check
@@ -42,6 +57,7 @@ def test_mathematical_latin(self):
4257

4358
self.failUnlessEqual(empty, 24)
4459

60+
@unittest.skipIf(sys.maxunicode < 0x10000, "narrow build")
4561
def test_mathematical_digits(self):
4662
# 5 consecutive sequences of 0-9
4763
for n in xrange(0x1d7ce, 0x1d800):
@@ -97,7 +113,15 @@ def test_specific(self):
97113
# Table that has less than 256 entriees
98114
(u'\u1eff',
99115
''),
116+
]
100117

118+
for input, output in TESTS:
119+
self.failUnlessEqual(unidecode(input), output)
120+
121+
@unittest.skipIf(sys.maxunicode < 0x10000, "narrow build")
122+
def test_specific_wide(self):
123+
124+
TESTS = [
101125
# Non-BMP character
102126
(u'\U0001d5a0',
103127
'A'),

tests/basic_3.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# -*- coding: utf-8 -*-
22
import unittest
3+
import sys
34
from unidecode import unidecode
45

56
class TestUnidecode(unittest.TestCase):
@@ -16,12 +17,13 @@ def test_bmp(self):
1617

1718
def test_circled_latin(self):
1819
# 1 sequence of a-z
19-
for n in xrange(0, 26):
20+
for n in range(0, 26):
2021
a = chr(ord('a') + n)
2122
b = unidecode(chr(0x24d0 + n))
2223

2324
self.failUnlessEqual(b, a)
2425

26+
@unittest.skipIf(sys.maxunicode < 0x10000, "narrow build")
2527
def test_mathematical_latin(self):
2628
# 13 consecutive sequences of A-Z, a-z with some codepoints
2729
# undefined. We just count the undefined ones and don't check
@@ -41,6 +43,7 @@ def test_mathematical_latin(self):
4143

4244
self.failUnlessEqual(empty, 24)
4345

46+
@unittest.skipIf(sys.maxunicode < 0x10000, "narrow build")
4447
def test_mathematical_digits(self):
4548
# 5 consecutive sequences of 0-9
4649
for n in range(0x1d7ce, 0x1d800):
@@ -96,7 +99,15 @@ def test_specific(self):
9699
# Table that has less than 256 entriees
97100
('\u1eff',
98101
''),
102+
]
103+
104+
for instr, output in TESTS:
105+
self.failUnlessEqual(unidecode(instr), output)
106+
107+
@unittest.skipIf(sys.maxunicode < 0x10000, "narrow build")
108+
def test_specific_wide(self):
99109

110+
TESTS = [
100111
# Non-BMP character
101112
('\U0001d5a0',
102113
'A'),

0 commit comments

Comments
 (0)