2

我想制作一个关于文本中字符位置的表格。例子:

Text = "一个苹果和一个香蕉"

Char:Positions(从 0 到 20)以及在这个位置使用了多少次

字符--> A:4101010...0-B:100000...0-C:000000...0-D:001000...0-E:000010...0-...-Z: 000000...0

怎么了?

position_list = []

i = 0
for char in range(29):
    position_list.append([])
    for position in range(20):
        position_list[i].append(0)
    i += 1


alphabet = ["a", "b", "c", "ç", "d", "e", "f", "g", "ğ", "h", "ı", "i", "j", "k", "l", "m", "n", "o", "ö", "p", "r", "s", "ş", "t", "u", "ü", "v", "y", "z"]
alphabet_index = 0
text = ["sample", "text"]

for word in text:
    x = 0
    for char in alphabet:
        start = 0
        while len(word) > start:
            char_pos = word.find(char, start)
            start += 1
            if char_pos == -1:
                break
            else:
                position_list[x][char_pos] += 1
        x += 1

print(position_list)
4

3 回答 3

0

我认为这是解决方案

import string
text = "an apple and a banana"
CharList = string.ascii_lowercase + string.ascii_uppercase
long_word = max(map(len, text.split()))
final_result = [[0] * long_word for i in range(len(CharList))]

for word in text.split():
    for pos, letter in enumerate(word):
        final_result[CharList.index(letter)][pos] += 1

for i in range(len(final_result)):
    letter = CharList[i]
    li = final_result[i]
    print(f'{letter}:{li}')
于 2019-12-28T14:05:17.183 回答
0

这解决了你的问题:

alphabet = ["a", "b", "c", "ç", "d", "e", "f", "g", "ğ", "h", "ı", "i", "j", "k", "l",
            "m", "n", "o", "ö", "p", "r", "s", "ş", "t", "u", "ü", "v", "y", "z"]

position_list = [[0]*20 for i in range(len(alphabet))]

text = "an apple and a banana"

for word in text.split():
    for i, c in enumerate(word):
        position_list[alphabet.index(c)][i] += 1
于 2019-12-28T13:44:59.290 回答
0

我希望我能正确理解你的问题。此脚本将从您的字母表中创建字典,其中值是文本单词中字符的位置:

from pprint import pprint
from itertools import zip_longest

Text = "an apple and a banana"
alphabet = ["a", "b", "c", "ç", "d", "e", "f", "g", "ğ", "h", "ı", "i", "j", "k", "l", "m", "n", "o", "ö", "p", "r", "s", "ş", "t", "u", "ü", "v", "y", "z"]
word_max_len = max(map(len, Text.split()))
position_dict = {a: [0 for _ in range(word_max_len)] for a in alphabet}

for i, v in enumerate(zip_longest(*Text.split())):
    for a in v:
        if a is None:
            continue
        position_dict[a][i] += 1

pprint(position_dict)

印刷:

{'a': [4, 1, 0, 1, 0, 1],
 'b': [1, 0, 0, 0, 0, 0],
 'c': [0, 0, 0, 0, 0, 0],
 'd': [0, 0, 1, 0, 0, 0],
 'e': [0, 0, 0, 0, 1, 0],
 'f': [0, 0, 0, 0, 0, 0],
 'g': [0, 0, 0, 0, 0, 0],
 'h': [0, 0, 0, 0, 0, 0],
 'i': [0, 0, 0, 0, 0, 0],
 'j': [0, 0, 0, 0, 0, 0],
 'k': [0, 0, 0, 0, 0, 0],
 'l': [0, 0, 0, 1, 0, 0],
 'm': [0, 0, 0, 0, 0, 0],
 'n': [0, 2, 1, 0, 1, 0],
 'o': [0, 0, 0, 0, 0, 0],
 'p': [0, 1, 1, 0, 0, 0],
 'r': [0, 0, 0, 0, 0, 0],
 's': [0, 0, 0, 0, 0, 0],
 't': [0, 0, 0, 0, 0, 0],
 'u': [0, 0, 0, 0, 0, 0],
 'v': [0, 0, 0, 0, 0, 0],
 'y': [0, 0, 0, 0, 0, 0],
 'z': [0, 0, 0, 0, 0, 0],
 'ç': [0, 0, 0, 0, 0, 0],
 'ö': [0, 0, 0, 0, 0, 0],
 'ü': [0, 0, 0, 0, 0, 0],
 'ğ': [0, 0, 0, 0, 0, 0],
 'ı': [0, 0, 0, 0, 0, 0],
 'ş': [0, 0, 0, 0, 0, 0]}
于 2019-12-28T13:45:06.373 回答