0

The code suppose to pull results from api, split it by day, and store each day’s results as distinct json file. Why it doesn't create json files?

twint_loop splits the date range into a series of days and calls twint_search to do the searching for each date. Each json is named after the date and stored in a directory based on the search term, using clean_name to ensure that it is a valid directory name.

    from datetime import timedelta
    from string import ascii_letters, digits
    from os import mkdir, path
    import pandas as pd
    import twint
    
    
    
    def clean_name(dirname):
        valid = set(ascii_letters + digits)
        return ''.join(a for a in dirname if a in valid)
    
    
    def twint_search(searchterm, since, until, json_name):
        '''
        Twint search for a specific date range.
        Stores results to json.
        '''
        c = twint.Config()
        c.Search = searchterm
        c.Since = since
        c.Until = until
        c.Hide_output = True
        c.Store_json = True
        c.Output = json_name
        c.Debug = True
    
        try:
            twint.run.Search(c)
        except (KeyboardInterrupt, SystemExit):
            raise
        except:
            print("Problem with %s." % since)
    
    
    def twint_loop(searchterm, since, until):
    
        dirname = clean_name(searchterm)
        try:
            # Create target Directory
            mkdir(dirname)
            print("Directory", dirname, "Created ")
        except FileExistsError:
            print("Directory", dirname, "already exists")
    
        daterange = pd.date_range(since, until)
    
        for start_date in daterange:
    
            since = start_date.strftime("%Y-%m-%d")
            until = (start_date + timedelta(days=1)).strftime("%Y-%m-%d")  
            # timeframe splitting days
    
            json_name = '%s.json' % since
            json_name = path.join(dirname, json_name)
    
            print('Getting %s ' % since)
            twint_search(searchterm, since, until, json_name)
    
    
    twint_loop('#Microbiology', '06-01-2021', '07-01-2021')  # my keywords, time
    
    from glob import glob  
    file_names = glob(path.join('hodl','*.json'))
    dfs = [pd.read_json(fn, lines = True) for fn in file_names]
    mic_df = pd.concat(dfs)
    
    mic_df.info()
4

0 回答 0