strings = [
"abc_34.txt",
"abc_2034.txt",
]
for string in strings:
first_part, ext = string.split(".")
prefix, number = first_part.split("_")
print prefix, number[-2:], ext
--output:--
abc 34 txt
abc 34 txt
import re
strings = [
"abc_34.txt",
"abc_2034.txt",
]
pattern = r"""
([^_]*) #Match not an underscore, 0 or more times, captured in group 1
_ #followed by an underscore
\d* #followed by a digit, 0 or more times, greedy
(\d{2}) #followed by a digit, twice, captured in group 2
[.] #followed by a period
(.*) #followed by any character, 0 or more times, captured in group 3
"""
regex = re.compile(pattern, flags=re.X) #ignore whitespace and comments in regex
for string in strings:
md = re.match(regex, string)
if md:
print md.group(1), md.group(2), md.group(3)
--output:--
abc 34 txt
abc 34 txt