reconciler: Move other score thresholds to constants
This commit is contained in:
		
							parent
							
								
									baa299c4c5
								
							
						
					
					
						commit
						86f4232df1
					
				
					 1 changed files with 14 additions and 10 deletions
				
			
		|  | @ -152,8 +152,10 @@ JUNK_WORDS = [ | ||||||
| ] | ] | ||||||
| JUNK_WORDS_RES = [re.compile(word, re.IGNORECASE) for word in JUNK_WORDS] | JUNK_WORDS_RES = [re.compile(word, re.IGNORECASE) for word in JUNK_WORDS] | ||||||
| ZERO_RE = re.compile('^0+') | ZERO_RE = re.compile('^0+') | ||||||
| FULL_MATCH_THRESHOLD = 0.8 | PAYEE_FULL_MATCH_THRESHOLD = 0.8 | ||||||
| PARTIAL_MATCH_THRESHOLD = 0.4 | PAYEE_PARTIAL_MATCH_THRESHOLD = 0.4 | ||||||
|  | OVERALL_EXCELLENT_MATCH_THRESHOLD = 0.8  # Clear winner | ||||||
|  | OVERALL_ACCEPTABLE_MATCH_THRESHOLD = 0.5  # Acceptable if only one match found | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def remove_duplicate_words(text: str) -> str: | def remove_duplicate_words(text: str) -> str: | ||||||
|  | @ -392,9 +394,9 @@ def records_match(r1: Dict, r2: Dict) -> Tuple[float, List[str]]: | ||||||
|     else: |     else: | ||||||
|         check_score = 0.0 |         check_score = 0.0 | ||||||
|         payee_score = payee_match(r1['payee'], r2['payee']) |         payee_score = payee_match(r1['payee'], r2['payee']) | ||||||
|         if payee_score > FULL_MATCH_THRESHOLD: |         if payee_score > PAYEE_FULL_MATCH_THRESHOLD: | ||||||
|             payee_message = '' |             payee_message = '' | ||||||
|         elif payee_score > PARTIAL_MATCH_THRESHOLD: |         elif payee_score > PAYEE_PARTIAL_MATCH_THRESHOLD: | ||||||
|             payee_message = 'partial payee match' |             payee_message = 'partial payee match' | ||||||
|         else: |         else: | ||||||
|             payee_message = 'payee mismatch' |             payee_message = 'payee mismatch' | ||||||
|  | @ -435,16 +437,16 @@ def match_statement_and_books( | ||||||
|         matches_found = 0 |         matches_found = 0 | ||||||
|         for i, r2 in enumerate(books_trans): |         for i, r2 in enumerate(books_trans): | ||||||
|             score, note = records_match(r1, r2) |             score, note = records_match(r1, r2) | ||||||
|             if score >= 0.5 and score >= best_match_score: |             if score >= OVERALL_ACCEPTABLE_MATCH_THRESHOLD and score >= best_match_score: | ||||||
|                 matches_found += 1 |                 matches_found += 1 | ||||||
|                 best_match_score = score |                 best_match_score = score | ||||||
|                 best_match_index = i |                 best_match_index = i | ||||||
|                 best_match_note = note |                 best_match_note = note | ||||||
|         if ( |         if ( | ||||||
|             best_match_score > 0.5 |             best_match_score > OVERALL_ACCEPTABLE_MATCH_THRESHOLD | ||||||
|             and matches_found == 1 |             and matches_found == 1 | ||||||
|             and 'check-id mismatch' not in best_match_note |             and 'check-id mismatch' not in best_match_note | ||||||
|             or best_match_score > 0.8 |             or best_match_score > OVERALL_EXCELLENT_MATCH_THRESHOLD | ||||||
|         ): |         ): | ||||||
|             matches.append(([r1], [books_trans[best_match_index]], best_match_note)) |             matches.append(([r1], [books_trans[best_match_index]], best_match_note)) | ||||||
|             # Don't try to make a second match against this books entry. |             # Don't try to make a second match against this books entry. | ||||||
|  | @ -484,16 +486,16 @@ def subset_match( | ||||||
|         r2['amount'] = total |         r2['amount'] = total | ||||||
|         for i, r1 in enumerate(statement_trans): |         for i, r1 in enumerate(statement_trans): | ||||||
|             score, note = records_match(r1, r2) |             score, note = records_match(r1, r2) | ||||||
|             if score >= 0.5 and score >= best_match_score: |             if score >= OVERALL_ACCEPTABLE_MATCH_THRESHOLD and score >= best_match_score: | ||||||
|                 matches_found += 1 |                 matches_found += 1 | ||||||
|                 best_match_score = score |                 best_match_score = score | ||||||
|                 best_match_index = i |                 best_match_index = i | ||||||
|                 best_match_note = note |                 best_match_note = note | ||||||
|         if ( |         if ( | ||||||
|             best_match_score > 0.5 |             best_match_score > OVERALL_ACCEPTABLE_MATCH_THRESHOLD | ||||||
|             and matches_found == 1 |             and matches_found == 1 | ||||||
|             and 'check-id mismatch' not in best_match_note |             and 'check-id mismatch' not in best_match_note | ||||||
|             or best_match_score > 0.8 |             or best_match_score > OVERALL_EXCELLENT_MATCH_THRESHOLD | ||||||
|         ): |         ): | ||||||
|             matches.append( |             matches.append( | ||||||
|                 ([statement_trans[best_match_index]], group_items, best_match_note) |                 ([statement_trans[best_match_index]], group_items, best_match_note) | ||||||
|  | @ -795,6 +797,8 @@ def main( | ||||||
|         statement_trans = read_csv(f) |         statement_trans = read_csv(f) | ||||||
| 
 | 
 | ||||||
|     # Dates are taken from the beginning/end of the statement. |     # Dates are taken from the beginning/end of the statement. | ||||||
|  |     # TODO: FR statements include the last day of previous statement and the | ||||||
|  |     # last day of this statement in the first/last rows. | ||||||
|     begin_date = statement_trans[0]['date'] |     begin_date = statement_trans[0]['date'] | ||||||
|     end_date = statement_trans[-1]['date'] |     end_date = statement_trans[-1]['date'] | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		
		Reference in a new issue