python - 基于Python中的字符串列表进行迭代

Question

使用文本文件中的 YouTube videoID 列表，下面的代码旨在循环这些内容，同时从所有这些视频中获取评论源。谁能发现我必须犯但找不到的循环错误？

# Set the videoID list
f = open('video_ids.txt', 'r')
videoID_list = f.read().splitlines()
f.close()

# Cycle through videoID list getting comments via the YouTube API
for video_id in videoID_list:
#Define the comments generator
def comments_generator(yt_service, video_id):
    comment_feed = yt_service.GetYouTubeVideoCommentFeed(video_id=video_id)
    while comment_feed is not None:
        for comment in comment_feed.entry:
            yield comment
        next_link = comment_feed.GetNextLink()
        if next_link is None:
            comment_feed = None
        else:
            comment_feed = yt_service.GetYouTubeVideoCommentFeed(next_link.href)

        for comment in comments_generator(yt_service, video_id):

            # About the video
            video_title = entry.media.title.text
            video_date = entry.published.text

            # About comments
            author_name = comment.author[0].name.text
            raw_text = comment.content.text 
            comment_date = comment.published.text

            # Keep only alphanumeric characters and spaces in the comment text
            text = re.sub(r'\W+', ' ', raw_text)

            # Write to a file ('a' means append) - Comment text is set to lowercase [.lower()]
            f = open('video_comments.tsv', 'a')
            f.write("{}\t{}\t{}\t{}\t{}\t{}\t\r".format(video_title, video_date[:10], comment_date[:10], comment_date[11:19], author_name, text.lower()))

            # Also print results on screen - Comment text is set to lowercase [.lower()]
    print("{}\t{}\t{}\t{}\t{}\t{}\t\r".format(video_title, video_date[:10], comment_date[:10], comment_date[11:19], author_name, text.lower()))

score 0 · Accepted Answer

修复代码中的一些错误后：

import gdata.youtube
import gdata.youtube.service
import re

yt_service = gdata.youtube.service.YouTubeService()

# Set the videoID list
f = open('video_ids.txt', 'r')
videoID_list = f.read().splitlines()
f.close()

#Define the comments generator
def comments_generator(yt_service, video_id):
  comment_feed = yt_service.GetYouTubeVideoCommentFeed(video_id=video_id)
  while comment_feed is not None:
    for comment in comment_feed.entry:
      yield comment
    next_link = comment_feed.GetNextLink()
    if next_link is None:
      comment_feed = None
    else:
      comment_feed = yt_service.GetYouTubeVideoCommentFeed(next_link.href)

f = open('video_comments.tsv', 'a')

# Cycle through videoID list getting comments via the YouTube API
for video_id in videoID_list:

  for comment in comments_generator(yt_service, video_id):

    video_entry = yt_service.GetYouTubeVideoEntry(video_id=video_id)

    # About the video
    video_title = video_entry.title.text
    video_date = video_entry.published.text
    # About comments
    author_name = comment.author[0].name.text
    raw_text = comment.content.text
    comment_date = comment.published.text

    # Keep only alphanumeric characters and spaces in the comment text
    text = re.sub(r'\W+', ' ', raw_text)
    # Write to a file ('a' means append) - Comment text is set to lowercase [.lower()]

    f.write("{}\t{}\t{}\t{}\t{}\t{}\t\r".format(video_title, video_date[:10], comment_date[:10], comment_date[11:19], author_name, text.lower()))


    # Also print results on screen - Comment text is set to lowercase [.lower()]
f.close()
print("{}\t{}\t{}\t{}\t{}\t{}\t\r".format(video_title, video_date[:10], comment_date[:10], comment_date[11:19], author_name, text.lower()))

python - 基于Python中的字符串列表进行迭代

1 回答 1

Related

Reference