acro1 = """
\\newabbreviation{abba}{ABBA}{Björn & Benny, Agnetha & Frida}
\\newabbreviation{unsc}{UNSC}{United Nations Space Command}
\\newabbreviation{odst}{ODST}{Orbital Drop Shock Trooper}
\\newabbreviation{fish}{FISH}{F' It, Stuff Happens}
\\newabbreviation{gps}{GPS}{Go Pound Sound}
\\newabbreviation{evil}{EVIL}{Every Villian is Lemon}
\\newabbreviation{otr}{OTR}{over the rainbow}
\\newabbreviation{sc}{SC}{Snack Club}
\\newabbreviation{mo}{MO}{modus operandi}
\\newabbreviation{ul}{UL}{ultralight}
\\newabbreviation{blt}{BLT}{bacon lettuce tomato}
"""

acro2 = """
\\newabbreviation{abba}{ABBA}{Björn & Benny, Agnetha & Frida}
\\newabbreviation{otr}{OTR}{Optimal Test Ruminant}
\\newabbreviation{pre}{PRE}{Prototype Ruminant Evaluation}
\\newabbreviation[longplural="Ruminants Under Test"]{rut}{RUT}{Ruminant Under Test}
\\newabbreviation{EVIL}{EVIL}{Every Villian is Lemon}
\\newabbreviation{irbh}{IRBH}{I'd Rather Be Hiking!}
\\newabbreviation{hh}{HH}{hobbit head}
\\newabbreviation{gps}{GPS}{Go Pound Sound}
\\newabbreviation{crud}{CRUD}{create, read, update, and delete}
"""

with open('Acronyms1.tex','w') as f:
    f.write(acro1)

with open('Acronyms2.tex','w') as f:
    f.write(acro2)

# Pretending we didn't just create these files.
files = ['Acronyms1.tex','Acronyms2.tex']


import regex as re
pattern = re.compile(r'\\newabbreviation(\[.*?\])?{(.*?)}{(.*?)}{(.*?)}\n')


matches_all = set()
for fn in files:
    f = open(fn)
    f_str = f.read()
    matches_f = re.findall(pattern, f_str)
    matches_all = matches_all.union(set(matches_f))
    f.close()
matches_all

{('', 'EVIL', 'EVIL', 'Every Villian is Lemon'),
 ('', 'abba', 'ABBA', 'Björn & Benny, Agnetha & Frida'),
 ('', 'blt', 'BLT', 'bacon lettuce tomato'),
 ('', 'crud', 'CRUD', 'create, read, update, and delete'),
 ('', 'evil', 'EVIL', 'Every Villian is Lemon'),
 ('', 'fish', 'FISH', "F' It, Stuff Happens"),
 ('', 'gps', 'GPS', 'Go Pound Sound'),
 ('', 'hh', 'HH', 'hobbit head'),
 ('', 'irbh', 'IRBH', "I'd Rather Be Hiking!"),
 ('', 'mo', 'MO', 'modus operandi'),
 ('', 'odst', 'ODST', 'Orbital Drop Shock Trooper'),
 ('', 'otr', 'OTR', 'Optimal Test Ruminant'),
 ('', 'otr', 'OTR', 'over the rainbow'),
 ('', 'pre', 'PRE', 'Prototype Ruminant Evaluation'),
 ('', 'sc', 'SC', 'Snack Club'),
 ('', 'ul', 'UL', 'ultralight'),
 ('', 'unsc', 'UNSC', 'United Nations Space Command'),
 ('[longplural="Ruminants Under Test"]', 'rut', 'RUT', 'Ruminant Under Test')}


import pandas as pd
df = pd.DataFrame(matches_all,columns=['optional','acronym id','short','long'])
df


df.loc[df['acronym id'].duplicated(keep=False),'duplicate flag'] = ' %%%%% DUPLICATE'
df.loc[df['long'].duplicated(keep=False),'duplicate flag'] = ' %%%%% DUPLICATE'
df


df['letter'] = df['acronym id'].str[0].str.upper()
letters = list(set(df['letter']))
letters.sort()

# for alphabetizing
df = df.sort_values(by=['letter','acronym id', 'long'])
df['entry'] = '\\newabbreviation' + df['optional'] + '{' + df['acronym id'] + '}{' + \
                df['short'] + '}{' + df['long'] + '}' + df['duplicate flag'].fillna('')

# top of the file
acronym_txt = """%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % ACRONYMS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
"""

# function for creating file section by letter
def to_latex_str_abc(x):
    l = list(x['letter'])[0]
    # print(l)
    abc_comment = """
%========================================================================================
%    """ + l + """
%========================================================================================
"""
    df_l = df.loc[df['letter'] == l]['entry']
    entry_txt = ''
    for entry in df_l:
        entry_txt += '\t' + entry + '\n'
    return abc_comment + entry_txt
df_by_l = list(df.groupby('letter').apply(to_latex_str_abc))
acronym_txt += ''.join(df_by_l)

# Review the result
print(acronym_txt)

# write content
with open('Acronyms.tex', 'w') as f:
    f.write(acronym_txt)

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % ACRONYMS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%========================================================================================
%    A
%========================================================================================
	\newabbreviation{abba}{ABBA}{Björn & Benny, Agnetha & Frida}

%========================================================================================
%    B
%========================================================================================
	\newabbreviation{blt}{BLT}{bacon lettuce tomato}

%========================================================================================
%    C
%========================================================================================
	\newabbreviation{crud}{CRUD}{create, read, update, and delete}

%========================================================================================
%    E
%========================================================================================
	\newabbreviation{EVIL}{EVIL}{Every Villian is Lemon} %%%%% DUPLICATE
	\newabbreviation{evil}{EVIL}{Every Villian is Lemon} %%%%% DUPLICATE

%========================================================================================
%    F
%========================================================================================
	\newabbreviation{fish}{FISH}{F' It, Stuff Happens}

%========================================================================================
%    G
%========================================================================================
	\newabbreviation{gps}{GPS}{Go Pound Sound}

%========================================================================================
%    H
%========================================================================================
	\newabbreviation{hh}{HH}{hobbit head}

%========================================================================================
%    I
%========================================================================================
	\newabbreviation{irbh}{IRBH}{I'd Rather Be Hiking!}

%========================================================================================
%    M
%========================================================================================
	\newabbreviation{mo}{MO}{modus operandi}

%========================================================================================
%    O
%========================================================================================
	\newabbreviation{odst}{ODST}{Orbital Drop Shock Trooper}
	\newabbreviation{otr}{OTR}{Optimal Test Ruminant} %%%%% DUPLICATE
	\newabbreviation{otr}{OTR}{over the rainbow} %%%%% DUPLICATE

%========================================================================================
%    P
%========================================================================================
	\newabbreviation{pre}{PRE}{Prototype Ruminant Evaluation}

%========================================================================================
%    R
%========================================================================================
	\newabbreviation[longplural="Ruminants Under Test"]{rut}{RUT}{Ruminant Under Test}

%========================================================================================
%    S
%========================================================================================
	\newabbreviation{sc}{SC}{Snack Club}

%========================================================================================
%    U
%========================================================================================
	\newabbreviation{ul}{UL}{ultralight}
	\newabbreviation{unsc}{UNSC}{United Nations Space Command}

Organizing and Combining LaTeX Acronyms/Glossary Entries with Python (glossaries package)¶

Introduction¶

Set-up¶

Match Pattern¶

Run pattern against file content¶

Dataframize¶

Duplicate Handling¶

Create Organized Content, Write It¶

Conclusions¶

	optional	acronym id	short	long
0		evil	EVIL	Every Villian is Lemon
1		hh	HH	hobbit head
2		EVIL	EVIL	Every Villian is Lemon
3		mo	MO	modus operandi
4		pre	PRE	Prototype Ruminant Evaluation
5		ul	UL	ultralight
6		otr	OTR	over the rainbow
7	[longplural="Ruminants Under Test"]	rut	RUT	Ruminant Under Test
8		otr	OTR	Optimal Test Ruminant
9		crud	CRUD	create, read, update, and delete
10		irbh	IRBH	I'd Rather Be Hiking!
11		sc	SC	Snack Club
12		blt	BLT	bacon lettuce tomato
13		unsc	UNSC	United Nations Space Command
14		fish	FISH	F' It, Stuff Happens
15		abba	ABBA	Björn & Benny, Agnetha & Frida
16		gps	GPS	Go Pound Sound
17		odst	ODST	Orbital Drop Shock Trooper