Add a helper script in order to clean a CSV file into a compatible format
This commit is contained in:
parent
8ed5054d25
commit
d7ef85fd30
1 changed files with 41 additions and 0 deletions
41
scripts/cleanWordCSV.py
Normal file
41
scripts/cleanWordCSV.py
Normal file
|
|
@ -0,0 +1,41 @@
|
||||||
|
import re
|
||||||
|
import csv
|
||||||
|
|
||||||
|
def isWord(w: str) -> bool:
|
||||||
|
"""
|
||||||
|
Checks if a word is considered valid for the purposes of Chatman
|
||||||
|
"""
|
||||||
|
|
||||||
|
if len(w) * 2 > 500:
|
||||||
|
return False
|
||||||
|
|
||||||
|
character_count = 0
|
||||||
|
for character in w:
|
||||||
|
if re.match(r"[A-Za-z\-]", character):
|
||||||
|
character_count += 1
|
||||||
|
if character_count // len(w) * 100 < 50:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
filename = input("Input File: ")
|
||||||
|
output_file = input("Output File: ")
|
||||||
|
final_words = []
|
||||||
|
with open(filename, "r") as csv_file:
|
||||||
|
reader = csv.reader(csv_file)
|
||||||
|
for row in reader:
|
||||||
|
if len(row) == 0:
|
||||||
|
continue
|
||||||
|
word = row[0].strip()
|
||||||
|
if isWord(word):
|
||||||
|
final_words.append(word)
|
||||||
|
# for line in f.readlines():
|
||||||
|
# word = line.strip().strip("\"")
|
||||||
|
with open(output_file, "w+") as output:
|
||||||
|
output.write("\n".join(final_words))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Loading…
Add table
Add a link
Reference in a new issue