Add a helper script in order to clean a CSV file into a compatible format
This commit is contained in:
parent
8ed5054d25
commit
d7ef85fd30
1 changed files with 41 additions and 0 deletions
41
scripts/cleanWordCSV.py
Normal file
41
scripts/cleanWordCSV.py
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
import re
|
||||
import csv
|
||||
|
||||
def isWord(w: str) -> bool:
|
||||
"""
|
||||
Checks if a word is considered valid for the purposes of Chatman
|
||||
"""
|
||||
|
||||
if len(w) * 2 > 500:
|
||||
return False
|
||||
|
||||
character_count = 0
|
||||
for character in w:
|
||||
if re.match(r"[A-Za-z\-]", character):
|
||||
character_count += 1
|
||||
if character_count // len(w) * 100 < 50:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def main():
|
||||
filename = input("Input File: ")
|
||||
output_file = input("Output File: ")
|
||||
final_words = []
|
||||
with open(filename, "r") as csv_file:
|
||||
reader = csv.reader(csv_file)
|
||||
for row in reader:
|
||||
if len(row) == 0:
|
||||
continue
|
||||
word = row[0].strip()
|
||||
if isWord(word):
|
||||
final_words.append(word)
|
||||
# for line in f.readlines():
|
||||
# word = line.strip().strip("\"")
|
||||
with open(output_file, "w+") as output:
|
||||
output.write("\n".join(final_words))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue