hooks.add_entity: Better handle common name prefix parts.
Keep these with the name they're attached to, rather than breaking the name in the middle.
This commit is contained in:
		
							parent
							
								
									f0dafcaed9
								
							
						
					
					
						commit
						f888b13c56
					
				
					 2 changed files with 10 additions and 0 deletions
				
			
		| 
						 | 
				
			
			@ -2,6 +2,11 @@ import re
 | 
			
		|||
import unicodedata
 | 
			
		||||
 | 
			
		||||
class AddEntityHook:
 | 
			
		||||
    NAME_PREFIXES = frozenset([
 | 
			
		||||
        'da',
 | 
			
		||||
        'de',
 | 
			
		||||
        'van',
 | 
			
		||||
    ])
 | 
			
		||||
    NONASCII_RE = re.compile(r'[^-A-Za-z0-9]')
 | 
			
		||||
    NONALNUM_RE = re.compile(r'[^-\w]')
 | 
			
		||||
    OPEN_PARENS = ['\\(', '\\[', '\\{']
 | 
			
		||||
| 
						 | 
				
			
			@ -41,6 +46,8 @@ class AddEntityHook:
 | 
			
		|||
        if not parts:
 | 
			
		||||
            return ''
 | 
			
		||||
        parts.insert(0, parts.pop())
 | 
			
		||||
        if parts[-1].lower() in self.NAME_PREFIXES:
 | 
			
		||||
            parts.insert(0, parts.pop())
 | 
			
		||||
        return '-'.join(parts)
 | 
			
		||||
 | 
			
		||||
    def run(self, data):
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -19,6 +19,9 @@ def test_load_all():
 | 
			
		|||
    ('Alex(Nickname) Smith', 'Smith-Alex'),
 | 
			
		||||
    ('稲荷', '稲荷'),
 | 
			
		||||
    ('Pøweł', 'Powel'),
 | 
			
		||||
    ('Elyse Jan Smith', 'Smith-Elyse-Jan'),
 | 
			
		||||
    ('Jan van Smith', 'van-Smith-Jan'),
 | 
			
		||||
    ('Francis da Silva', 'da-Silva-Francis'),
 | 
			
		||||
])
 | 
			
		||||
def test_add_entity(payee, expected):
 | 
			
		||||
    data = {'payee': payee}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		
		Reference in a new issue