専門ユニット2/山内研セミナー(2020/10/20)

関連サイトと資料

フォルダへのPython仮想環境の適用

先週お話した「.vscode」フォルダの下に設定ファイル「setings.json」ファイルを配置する方法は、あまり好ましくないようです(警告のダイアログが出ました)。 そこで、以下の方法をお勧めします。

  1. 「.vscode」フォルダを削除し、当該フォルダをVSCodeのメニュー「ファイル」ー「フォルダを開く」で開きます。
  2. コントロールキーとシフトキーと「p」キーを同時に押して、コマンドパレットを表示します。そして、上部のテキストボックスに「python: select i」と入力します。

  3. 表示された「Python: Select Interpreter」をクリックします。
  4. 表示された仮想環境の中から、適用したい環境をクリックします。

1.3.1 キーコネクタを探せ

test2.py(平均接続数の算出)
users = [
  { "id": 0, "name": "Hero" },
  { "id": 1, "name": "Dunn" },
  { "id": 2, "name": "Sue" },
  { "id": 3, "name": "Chi" },
  { "id": 4, "name": "Thor" },
  { "id": 5, "name": "Clive" },
  { "id": 6, "name": "Hicks" },
  { "id": 7, "name": "Devin" },
  { "id": 8, "name": "Kate" },
  { "id": 9, "name": "Klein" },
]
   
friendship_pairs = [(0, 1), (0, 2), (1, 2), (1, 3), (2, 3), (3, 4),
  (4, 5), (5, 6), (5, 7), (6, 8), (7, 8), (8, 9)]
   
# 各ユーザごとに空のリストで辞書を初期化する
friendships = {user["id"]: [] for user in users}
   
# すべての交友関係をループして、辞書を埋める
for i, j in friendship_pairs:
  friendships[i].append(j)
  friendships[j].append(i)
   
def number_of_friends(user):
  # 各ユーザは、何人の友達を持つだろうか?
  user_id = user["id"]
  friend_ids = friendships[user_id]
  return len(friend_ids)
  
total_connections = sum(number_of_friends(user) for user in users)
num_users = len(users)
avg_connections = total_connections / num_users
print('平均接続数:{0}'.format(avg_connections))
    

test3.py(最も友達の多いユーザを探索)
users = [
  { "id": 0, "name": "Hero" },
  { "id": 1, "name": "Dunn" },
  { "id": 2, "name": "Sue" },
  { "id": 3, "name": "Chi" },
  { "id": 4, "name": "Thor" },
  { "id": 5, "name": "Clive" },
  { "id": 6, "name": "Hicks" },
  { "id": 7, "name": "Devin" },
  { "id": 8, "name": "Kate" },
  { "id": 9, "name": "Klein" },
]
   
friendship_pairs = [(0, 1), (0, 2), (1, 2), (1, 3), (2, 3), (3, 4),
  (4, 5), (5, 6), (5, 7), (6, 8), (7, 8), (8, 9)]
  
# 各ユーザごとに空のリストで辞書を初期化する
friendships = {user["id"]: [] for user in users}
  
# すべての交友関係をループして、辞書を埋める
for i, j in friendship_pairs:
  friendships[i].append(j)
  friendships[j].append(i)
  
def number_of_friends(user):
  # 各ユーザは、何人の友達を持つだろうか?
  user_id = user["id"]
  friend_ids = friendships[user_id]
  return len(friend_ids)
   
# (ユーザID, 友達の数)のリストを作る
num_friends_by_id = [(user["id"], number_of_friends(user)) for user in users]
num_friends_by_id.sort(key=lambda id_and_friends: id_and_friends[1], reverse=True)
   
print(num_friends_by_id)
    

1.3.2 知り合いかも?

test4.py(友達の友達を探索/失敗例)
users = [
  { "id": 0, "name": "Hero" },
  { "id": 1, "name": "Dunn" },
  { "id": 2, "name": "Sue" },
  { "id": 3, "name": "Chi" },
  { "id": 4, "name": "Thor" },
  { "id": 5, "name": "Clive" },
  { "id": 6, "name": "Hicks" },
  { "id": 7, "name": "Devin" },
  { "id": 8, "name": "Kate" },
  { "id": 9, "name": "Klein" },
]
  
friendship_pairs = [(0, 1), (0, 2), (1, 2), (1, 3), (2, 3), (3, 4),
    (4, 5), (5, 6), (5, 7), (6, 8), (7, 8), (8, 9)]
   
# 各ユーザごとに空のリストで辞書を初期化する
friendships = {user["id"]: [] for user in users}
   
# すべての交友関係をループして、辞書を埋める
for i, j in friendship_pairs:
  friendships[i].append(j)
  friendships[j].append(i)
  
def number_of_friends(user):
  # 各ユーザは、何人の友達を持つだろうか?
  user_id = user["id"]
  friend_ids = friendships[user_id]
  return len(friend_ids)
  
def foaf_ids_bad(user):
  # foafは、friend of a friendの短縮形
  return [foaf_id 
    for friend_id in friendships[user["id"]] 
    for foaf_id in friendships[friend_id]]
   
foaf0 = foaf_ids_bad(users[0])
print(foaf0)
    

test5.py(共通の友達の数を探索)
from collections import Counter
   
users = [
  { "id": 0, "name": "Hero" },
  { "id": 1, "name": "Dunn" },
  { "id": 2, "name": "Sue" },
  { "id": 3, "name": "Chi" },
  { "id": 4, "name": "Thor" },
  { "id": 5, "name": "Clive" },
  { "id": 6, "name": "Hicks" },
  { "id": 7, "name": "Devin" },
  { "id": 8, "name": "Kate" },
  { "id": 9, "name": "Klein" },
]
   
friendship_pairs = [(0, 1), (0, 2), (1, 2), (1, 3), (2, 3), (3, 4),
  (4, 5), (5, 6), (5, 7), (6, 8), (7, 8), (8, 9)]
  
# 各ユーザごとに空のリストで辞書を初期化する
friendships = {user["id"]: [] for user in users}
   
# すべての交友関係をループして、辞書を埋める
for i, j in friendship_pairs:
  friendships[i].append(j)
  friendships[j].append(i)
  
def number_of_friends(user):
  # 各ユーザは、何人の友達を持つだろうか?
  user_id = user["id"]
  friend_ids = friendships[user_id]
  return len(friend_ids)
   
def friends_of_friends(user):
  user_id = user["id"]
  return Counter(
    foaf_id
    for friend_id in friendships[user_id]
    for foaf_id in friendships[friend_id]
    if foaf_id != user_id
    and foaf_id not in friendships[user_id]
  )
   
print(friends_of_friends(users[3]))
    

各ユーザが持っている興味
interests = [
  (0, "Hadoop"), (0, "Big Data"), (0, "HBase"), (0, "Java"),
  (0, "Spark"), (0, "Storm"), (0, "Cassandra"),
  (1, "NoSQL"), (1, "MongoDB"), (1, "Cassandra"), (1, "HBase"),
  (1, "Postgres"), (2, "Python"), (2, "scikit-learn"), (2, "scipy"),
  (2, "numpy"), (2, "statsmodels"), (2, "pandas"), (3, "R"), (3, "Python"),
  (3, "statistics"), (3, "regression"), (3, "probability"),
  (4, "achine learning"), (4, "regression"), (4, "decision trees"),
  (4, "libsvm"), (5, "Python"), (5, "R"), (5, "Java"), (5, "C++"),
  (5, "Haskel"), (5, "prgramming languages"), (6, "statistics"),
  (6, "probability"), (6, "mathematics"), (6, "theory"),
  (7, "machine learning"), (7, "scikit-learn"), (7, "Mahout"),
  (7, "neural networks"), (8, "neural networks"), (8, "deep learning"),
  (8, "Big Data"), (8, "artificial intelligence"), (9, "Hadoop"),
  (9, "Java"), (9, "MapReduce"), (9, "Big Data")
]
    

test6.py(指定した分野に興味を持つユーザの探索)
users = [
  { "id": 0, "name": "Hero" },
  { "id": 1, "name": "Dunn" },
  { "id": 2, "name": "Sue" },
  { "id": 3, "name": "Chi" },
  { "id": 4, "name": "Thor" },
  { "id": 5, "name": "Clive" },
  { "id": 6, "name": "Hicks" },
  { "id": 7, "name": "Devin" },
  { "id": 8, "name": "Kate" },
  { "id": 9, "name": "Klein" },
]
   
friendship_pairs = [(0, 1), (0, 2), (1, 2), (1, 3), (2, 3), (3, 4),
  (4, 5), (5, 6), (5, 7), (6, 8), (7, 8), (8, 9)]
   
interests = [
  (0, "Hadoop"), (0, "Big Data"), (0, "HBase"), (0, "Java"),
  (0, "Spark"), (0, "Storm"), (0, "Cassandra"),
  (1, "NoSQL"), (1, "MongoDB"), (1, "Cassandra"), (1, "HBase"),
  (1, "Postgres"), (2, "Python"), (2, "scikit-learn"), (2, "scipy"),
  (2, "numpy"), (2, "statsmodels"), (2, "pandas"), (3, "R"), (3, "Python"),
  (3, "statistics"), (3, "regression"), (3, "probability"),
  (4, "achine learning"), (4, "regression"), (4, "decision trees"),
  (4, "libsvm"), (5, "Python"), (5, "R"), (5, "Java"), (5, "C++"),
  (5, "Haskel"), (5, "prgramming languages"), (6, "statistics"),
  (6, "probability"), (6, "mathematics"), (6, "theory"),
  (7, "machine learning"), (7, "scikit-learn"), (7, "Mahout"),
  (7, "neural networks"), (8, "neural networks"), (8, "deep learning"),
  (8, "Big Data"), (8, "artificial intelligence"), (9, "Hadoop"),
  (9, "Java"), (9, "MapReduce"), (9, "Big Data")
]
   
# 各ユーザごとに空のリストで辞書を初期化する
friendships = {user["id"]: [] for user in users}
  
# すべての交友関係をループして、辞書を埋める
for i, j in friendship_pairs:
  friendships[i].append(j)
  friendships[j].append(i)
  
def number_of_friends(user):
  # 各ユーザは、何人の友達を持つだろうか?
  user_id = user["id"]
  friend_ids = friendships[user_id]
  return len(friend_ids)
   
def data_scientists_who_like(target_interest):
  # 指定した分野に興味を持つユーザのIDを見つける
  return [user_id
    for user_id, user_interest in interests
    if user_interest == target_interest]
   
print(data_scientists_who_like("Java"))
    

test7.py(興味ごとのユーザリストおよびその逆リストの作成,共通する興味が最も多いユーザの探索)
from collections import defaultdict
from collections import Counter
  
users = [
  { "id": 0, "name": "Hero" },
  { "id": 1, "name": "Dunn" },
  { "id": 2, "name": "Sue" },
  { "id": 3, "name": "Chi" },
  { "id": 4, "name": "Thor" },
  { "id": 5, "name": "Clive" },
  { "id": 6, "name": "Hicks" },
  { "id": 7, "name": "Devin" },
  { "id": 8, "name": "Kate" },
  { "id": 9, "name": "Klein" },
]
  
friendship_pairs = [(0, 1), (0, 2), (1, 2), (1, 3), (2, 3), (3, 4),
    (4, 5), (5, 6), (5, 7), (6, 8), (7, 8), (8, 9)]
  
interests = [
  (0, "Hadoop"), (0, "Big Data"), (0, "HBase"), (0, "Java"),
  (0, "Spark"), (0, "Storm"), (0, "Cassandra"),
  (1, "NoSQL"), (1, "MongoDB"), (1, "Cassandra"), (1, "HBase"),
  (1, "Postgres"), (2, "Python"), (2, "scikit-learn"), (2, "scipy"),
  (2, "numpy"), (2, "statsmodels"), (2, "pandas"), (3, "R"), (3, "Python"),
  (3, "statistics"), (3, "regression"), (3, "probability"),
  (4, "achine learning"), (4, "regression"), (4, "decision trees"),
  (4, "libsvm"), (5, "Python"), (5, "R"), (5, "Java"), (5, "C++"),
  (5, "Haskel"), (5, "prgramming languages"), (6, "statistics"),
  (6, "probability"), (6, "mathematics"), (6, "theory"),
  (7, "machine learning"), (7, "scikit-learn"), (7, "Mahout"),
  (7, "neural networks"), (8, "neural networks"), (8, "deep learning"),
  (8, "Big Data"), (8, "artificial intelligence"), (9, "Hadoop"),
  (9, "Java"), (9, "MapReduce"), (9, "Big Data")
]
   
# 各ユーザごとに空のリストで辞書を初期化する
friendships = {user["id"]: [] for user in users}
   
# すべての交友関係をループして、辞書を埋める
for i, j in friendship_pairs:
  friendships[i].append(j)
  friendships[j].append(i)
  
def number_of_friends(user):
  # 各ユーザは、何人の友達を持つだろうか?
  user_id = user["id"]
  friend_ids = friendships[user_id]
  return len(friend_ids)
  
# 興味をキーとして、関連するユーザのリストを値とする辞書を作成する
user_ids_by_interest = defaultdict(list)
   
for user_id, interest in interests:
  user_ids_by_interest[interest].append(user_id)
   
# ユーザをキーとして、関連する興味のリストを値とする辞書を作成する
interests_by_user_id = defaultdict(list)
   
for user_id, interest in interests:
  interests_by_user_id[user_id].append(interest)
  
def most_common_interests_with(user):
  return Counter(
    interested_user_id
    for interest in interests_by_user_id[user["id"]]
    for interested_user_id in user_ids_by_interest[interest]
    if interested_user_id != user["id"]
  )
   
print(most_common_interests_with(users[0]))
    

1.3.3 給与と経験値

勤続年数と給与のデータ
salaries_and_tenures = [(83000, 8.7), (88000, 8.1),
  (48000, 0.7), (76000, 6),
  (69000, 6.5), (76000, 7.5),
  (60000, 2.5), (83000, 10),
  (48000, 1.9), (63000, 4.2)]
    

test8.py(勤続年数ごとの平均給与を算出する)
from collections import defaultdict
   
salaries_and_tenures = [(83000, 8.7), (88000, 8.1),
  (48000, 0.7), (76000, 6),
  (69000, 6.5), (76000, 7.5),
  (60000, 2.5), (83000, 10),
  (48000, 1.9), (63000, 4.2)]
  
# 年数をキーとして、勤続年数ごとの給与額のリストを値とする辞書を作成する
salary_by_tenure = defaultdict(list)
  
for salary, tenure in salaries_and_tenures:
  salary_by_tenure[tenure].append(salary)
  
# 年数をキーとして、勤続年数ごとの給与額平均を値とする辞書を作成する
average_salary_by_tenure = {
  tenure: sum(salaries) / len(salaries)
  for tenure, salaries in salary_by_tenure.items()
}
  
print(average_salary_by_tenure)
    

test9.py(勤続年数のバケツごとの平均給与を算出する)
from collections import defaultdict
   
salaries_and_tenures = [(83000, 8.7), (88000, 8.1),
  (48000, 0.7), (76000, 6),
  (69000, 6.5), (76000, 7.5),
  (60000, 2.5), (83000, 10),
  (48000, 1.9), (63000, 4.2)]
   
# 勤続年数バケツをキーとして、バケツ内の給与額リストを値とする辞書を作成する
salary_by_tenure_bucket = defaultdict(list)
  
def tenure_bucket(tenure):
  if tenure < 2:
    return '2年未満'
  elif tenure < 5:
    return '2~5年'
  else:
    return '5年以上'
  
for salary, tenure in salaries_and_tenures:
  bucket =tenure_bucket(tenure)
  salary_by_tenure_bucket[bucket].append(salary)
   
# 勤続年数バケツをキーとして、バケツ内の給与額平均を値とする
average_salary_by_bucket = {
  tenure_bucket: sum(salaries) / len(salaries)
  for tenure_bucket, salaries in salary_by_tenure_bucket.items()
}
   
print(average_salary_by_bucket)
    

1.3.5 興味に関するあれこれ

test10.py(単語のカウント)
from collections import Counter
  
interests = [
  (0, "Hadoop"), (0, "Big Data"), (0, "HBase"), (0, "Java"),
  (0, "Spark"), (0, "Storm"), (0, "Cassandra"),
  (1, "NoSQL"), (1, "MongoDB"), (1, "Cassandra"), (1, "HBase"),
  (1, "Postgres"), (2, "Python"), (2, "scikit-learn"), (2, "scipy"),
  (2, "numpy"), (2, "statsmodels"), (2, "pandas"), (3, "R"), (3, "Python"),
  (3, "statistics"), (3, "regression"), (3, "probability"),
  (4, "achine learning"), (4, "regression"), (4, "decision trees"),
  (4, "libsvm"), (5, "Python"), (5, "R"), (5, "Java"), (5, "C++"),
  (5, "Haskel"), (5, "prgramming languages"), (6, "statistics"),
  (6, "probability"), (6, "mathematics"), (6, "theory"),
  (7, "machine learning"), (7, "scikit-learn"), (7, "Mahout"),
  (7, "neural networks"), (8, "neural networks"), (8, "deep learning"),
  (8, "Big Data"), (8, "artificial intelligence"), (9, "Hadoop"),
  (9, "Java"), (9, "MapReduce"), (9, "Big Data")
]
  
words_and_counts = Counter(word
  for user, interest in interests
  for word in interest.lower().split()
)
   
for word, count in words_and_counts.most_common():
  if count > 1:
    print(word, count)