Ephemeral-ing Twitter

So now ephemeral was working on my current tweets, but not handling the “historical” ones which twitter deems to be the ones older than the 3,200 most recent.

There are APIs that you can use to access those, but they require a paid license. So instead I went into my settings in twitter and requested an archive of my tweets. This takes a number of hours but eventually you get a download link and a while after using that you end up with a huge zip file.

In that archive you’ll find twitter-data/tweet.js which is a javascript representation of your tweets. I process this with the first code snippet below.

The first line of tweet.js is a variable assignment but if you get rid of that (the initial sed), it can then be processed with jq. I strip out my whitelisted tweets and then put them in doomed.list.

#!/bin/bash

sed '1s/.*/[{/' twitter-data/tweet.js \
  | jq -r '.[] |
      select((.created_at|strptime("%a %b %d %H:%M:%S %z %Y")|mktime)
             < 1545339691 ) | .id' \
  | grep -v -E '123|456|789' \
  > doomed.list

I made this one-off based on ephemeral/main.go. Stripped out the lambda bits since it runs on my laptop and then hard code lots of the other bits. It’s a one off so that’s fine. It will loop through doomed.list and remove all the remaining “historical” tweets.

Set the environment vars and then do go run main.go to have it do it’s thing.

It’s a weird feeling to remove it all but it does change how you use it and reminds you that twitter is a conversation, not an archive.

package main

import (
	"bufio"
	"net/url"
	"os"
	"strconv"
	"strings"
	"time"

	"log"

	"github.com/ChimeraCoder/anaconda"
)

var (
	consumerKey       = getenv("TWITTER_CONSUMER_KEY")
	consumerSecret    = getenv("TWITTER_CONSUMER_SECRET")
	accessToken       = getenv("TWITTER_ACCESS_TOKEN")
	accessTokenSecret = getenv("TWITTER_ACCESS_TOKEN_SECRET")
)

func getenv(name string) string {
	v := os.Getenv(name)
	if v == "" {
		panic("missing required environment variable " + name)
	}
	return v
}

func getTimeline(api *anaconda.TwitterApi) ([]anaconda.Tweet, error) {
	args := url.Values{}
	args.Add("count", "200")        // Twitter only returns most recent 20 tweets by default, so override
	args.Add("include_rts", "true") // When using count argument, RTs are excluded, so include them as recommended
	timeline, err := api.GetUserTimeline(args)
	if err != nil {
		return make([]anaconda.Tweet, 0), err
	}
	return timeline, nil
}

func deleteFromTimeline(api *anaconda.TwitterApi) int {
	count := 0
	already := 0
	suspended := 0
	blocked := 0
	failed := 0
	doomed, err := os.Open("doomed.list")
	if err != nil {
		log.Fatal("could not get deletion list", err)
	}
	defer doomed.Close()

	scanner := bufio.NewScanner(doomed)
	for scanner.Scan() {
		time.Sleep(time.Second / 2)
		id, err := strconv.ParseInt(scanner.Text(), 10, 64)
		if err != nil {
			failed++
			log.Print("failed to parse doomed: ", err)
			continue
		}
		_, err = api.DeleteTweet(id, true)
		if err != nil {
			if strings.Contains(err.Error(), "No status found with that ID.") {
				already++
				if already%100 == 0 {
					log.Print("ALREADY DELETED COUNT ", already)
				}
			} else if strings.Contains(err.Error(), "User has been suspended.") {
				suspended++
				log.Print("SUSPENDED ID ", id)
			} else if strings.Contains(err.Error(), "Sorry, you are not authorized to see this status.") {
				blocked++
				log.Print("SUSPENDED ID ", id)
			} else {
				failed++
				log.Print("failed to delete: ", err)
			}
			continue
		}
		log.Print("DELETED ID ", id)
		count++
	}
	log.Print("no more tweets to delete")
	log.Printf("Totals: %d deleted, %d already deleted, %d suspended, %d blocked, %d failed",
		count, already, suspended, blocked, failed)

	return count
}

func main() {
	anaconda.SetConsumerKey(consumerKey)
	anaconda.SetConsumerSecret(consumerSecret)
	api := anaconda.NewTwitterApi(accessToken, accessTokenSecret)
	api.SetLogger(anaconda.BasicLogger)

	count := deleteFromTimeline(api)
	if count == 0 {
		os.Exit(1)
	}
}