-
Notifications
You must be signed in to change notification settings - Fork 0
/
gethighlights.py
executable file
·114 lines (99 loc) · 3.75 KB
/
gethighlights.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/env python3
import csv
import os
import json
import requests
# URL of the Google Sheets CSV export
sheet_url = 'https://docs.google.com/spreadsheets/d/1n28Iqsj9nZL-ku6HOPJPSa6KUEpQ6xO00McQ96f2dww/export?exportFormat=csv'
# Attempt to download the CSV file
response = requests.get(sheet_url)
if response.status_code != 200:
print("Failed to download the CSV file")
exit(1)
# Decode the CSV data
csv_data = response.content.decode('utf-8')
csv_reader = csv.DictReader(csv_data.splitlines())
# Directories for storing data
data_dir = '_data/books'
collection_dir = '_books'
os.makedirs(data_dir, exist_ok=True)
os.makedirs(collection_dir, exist_ok=True)
# Load cached book data
cache_file = 'books_cache.json'
if os.path.exists(cache_file):
with open(cache_file, 'r', encoding='utf-8') as file:
cache = json.load(file)
else:
cache = {}
# Dictionary to store book information
books = {}
# Dictionary to store non-ISBN sources and their highlights
non_isbn_sources = {}
# Function to check if a string is a number
def is_number(s):
try:
float(s)
return True
except ValueError:
return False
# Process each row in the CSV
for row in csv_reader:
isbn = row['isbn']
highlight = row['highlight']
# Check if ISBN is a number
if is_number(isbn):
# Handle rows with ISBN
if isbn not in books:
book_info = cache.get(isbn, {
"title": "Unknown Title",
"authors": ["Unknown Author"],
"publisher": "Unknown Publisher",
"publishedDate": "Unknown Date",
"coverImage": ""
})
books[isbn] = book_info
books[isbn]['highlights'] = []
books[isbn]['highlights'].append(highlight)
else:
# Handle non-ISBN sources
source = isbn if isbn.strip() else "Unknown Source"
if source not in non_isbn_sources:
non_isbn_sources[source] = []
non_isbn_sources[source].append(highlight)
# Write information for each book to separate markdown files
for isbn, book in books.items():
first_author = book['authors'][0].split()
first_author_last_name = first_author[-1] if first_author else "Unknown"
print(book)
with open(os.path.join(collection_dir, f"{isbn}.md"), 'w', encoding='utf-8') as file:
file.write('---\n')
file.write('layout: post\n')
file.write(f"title: \"{book['title']}\"\n")
file.write(f"authors: \"{', '.join(book['authors'])}\"\n")
file.write(f"first-author-last-name: \"{first_author_last_name}\"\n")
file.write(f"publisher: \"{book['publisher']}\"\n")
file.write(f"publishedDate: \"{book['publishedDate']}\"\n")
file.write(f"page_number: \"{book['pageCount']}\"\n")
file.write(f"coverImage: \"{book.get('coverImage', '')}\"\n")
non_empty_highlights = [h for h in book['highlights'] if h.strip()]
if non_empty_highlights:
file.write('highlights:\n')
for highlight in non_empty_highlights:
highlight = highlight.replace('"', '\\"')
file.write(f" - \"{highlight}\"\n")
file.write('---\n\n')
# if non_isbn_sources:
# with open(os.path.join("other.md"), 'w', encoding='utf-8') as file:
# file.write('---\n')
# file.write('layout: other\n')
# file.write('title: "Other Quotes"\n')
# file.write('highlights:\n')
#
# for source, highlights in non_isbn_sources.items():
# file.write(f" - source: \"{source}\"\n")
# file.write(" quotes:\n")
# for highlight in highlights:
# highlight = highlight.replace('"', '\\"')
# file.write(f" - \"{highlight}\"\n")
#
# file.write('---\n\n')